diff mbox series

[v38,21/24] x86/vdso: Implement a vDSO for Intel SGX enclave call

Message ID 20200915112842.897265-22-jarkko.sakkinen@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Intel SGX foundations | expand

Commit Message

Jarkko Sakkinen Sept. 15, 2020, 11:28 a.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

An SGX runtime must be aware of the exceptions, which happen inside an
enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
the CPU exception back to the caller exactly when it happens.

Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
vDSO handler fills this information to the user provided buffer or
alternatively trigger user provided callback at the time of the exception.

The calling convention is custom and does not follow System V x86-64 ABI.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Jethro Beekman <jethro@fortanix.com>
Tested-by: Jethro Beekman <jethro@fortanix.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Co-developed-by: Cedric Xing <cedric.xing@intel.com>
Signed-off-by: Cedric Xing <cedric.xing@intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/x86/entry/vdso/Makefile             |   2 +
 arch/x86/entry/vdso/vdso.lds.S           |   1 +
 arch/x86/entry/vdso/vsgx_enter_enclave.S | 157 +++++++++++++++++++++++
 arch/x86/include/asm/enclu.h             |   8 ++
 arch/x86/include/uapi/asm/sgx.h          | 128 ++++++++++++++++++
 5 files changed, 296 insertions(+)
 create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
 create mode 100644 arch/x86/include/asm/enclu.h

Comments

Borislav Petkov Sept. 24, 2020, 6:04 p.m. UTC | #1
On Tue, Sep 15, 2020 at 02:28:39PM +0300, Jarkko Sakkinen wrote:
> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> new file mode 100644
> index 000000000000..adbd59d41517
> --- /dev/null
> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S

Why not simply

arch/x86/entry/vdso/sgx.S

?

> @@ -0,0 +1,157 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include <linux/linkage.h>
> +#include <asm/export.h>
> +#include <asm/errno.h>
> +#include <asm/enclu.h>
> +
> +#include "extable.h"
> +
> +/* Offset of 'struct sgx_enclave_run' relative to %rbp. */
> +#define SGX_ENCLAVE_RUN_PTR	2*8
> +
> +/* Offsets into 'struct sgx_enclave_run'. */
> +#define SGX_ENCLAVE_RUN_TSC		0*8
> +#define SGX_ENCLAVE_RUN_FLAGS		1*8
> +#define SGX_ENCLAVE_RUN_EXIT_REASON	1*8 + 4
> +#define SGX_ENCLAVE_RUN_USER_HANDLER	2*8
> +/* #define SGX_ENCLAVE_RUN_USER_DATA	3*8 */

What's with that guy? Left here as documentation showing what's at 3*8
offset?

> +#define SGX_ENCLAVE_RUN_EXCEPTION	4*8
> +
> +#define SGX_SYNCHRONOUS_EXIT		0
> +#define SGX_EXCEPTION_EXIT		1

Those are in ...uapi/asm/sgx.h too. Unify?

> +
> +/* Offsets into sgx_enter_enclave.exception. */
> +#define SGX_EX_LEAF			0*8
> +#define SGX_EX_TRAPNR			0*8+4
> +#define SGX_EX_ERROR_CODE		0*8+6
> +#define SGX_EX_ADDRESS			1*8
> +
> +.code64
> +.section .text, "ax"
> +
> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> +	/* Prolog */
> +	.cfi_startproc

Are the CFI annotations for userspace?

> +	push	%rbp
> +	.cfi_adjust_cfa_offset	8
> +	.cfi_rel_offset		%rbp, 0
> +	mov	%rsp, %rbp
> +	.cfi_def_cfa_register	%rbp
> +	push	%rbx
> +	.cfi_rel_offset		%rbx, -8
> +
> +	mov	%ecx, %eax
> +.Lenter_enclave:
> +	/* EENTER <= leaf <= ERESUME */
> +	cmp	$EENTER, %eax
> +	jb	.Linvalid_input
> +	cmp	$ERESUME, %eax
> +	ja	.Linvalid_input
> +
> +	mov	SGX_ENCLAVE_RUN_PTR(%rbp), %rcx
> +
> +	/* No flags are currently defined/supported. */
> +	cmpl	$0, SGX_ENCLAVE_RUN_FLAGS(%rcx)
> +	jne	.Linvalid_input
> +
> +	/* Load TCS and AEP */

		TSC

I can see why you would write "TCS" though - there's a thread control
structure thing too in that patch.

> +	mov	SGX_ENCLAVE_RUN_TSC(%rcx), %rbx
> +	lea	.Lasync_exit_pointer(%rip), %rcx
> +
> +	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
> +.Lasync_exit_pointer:
> +.Lenclu_eenter_eresume:
> +	enclu
> +
> +	/* EEXIT jumps here unless the enclave is doing something fancy. */
> +	mov	SGX_ENCLAVE_RUN_PTR(%rbp), %rbx
> +
> +	/* Set exit_reason. */
> +	movl	$SGX_SYNCHRONOUS_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx)
> +
> +	/* Invoke userspace's exit handler if one was provided. */
> +.Lhandle_exit:
> +	cmpq	$0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx)
> +	jne	.Linvoke_userspace_handler
> +
> +	/* Success, in the sense that ENCLU was attempted. */
> +	xor	%eax, %eax
> +
> +.Lout:
> +	pop	%rbx
> +	leave
> +	.cfi_def_cfa		%rsp, 8
> +	ret
> +
> +	/* The out-of-line code runs with the pre-leave stack frame. */
> +	.cfi_def_cfa		%rbp, 16
> +
> +.Linvalid_input:
> +	mov	$(-EINVAL), %eax
> +	jmp	.Lout
> +
> +.Lhandle_exception:
> +	mov	SGX_ENCLAVE_RUN_PTR(%rbp), %rbx
> +
> +	/* Set the exit_reason and exception info. */
> +	movl	$SGX_EXCEPTION_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx)
> +
> +	mov	%eax, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_LEAF)(%rbx)
> +	mov	%di,  (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_TRAPNR)(%rbx)
> +	mov	%si,  (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ERROR_CODE)(%rbx)
> +	mov	%rdx, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ADDRESS)(%rbx)
> +	jmp	.Lhandle_exit
> +
> +.Linvoke_userspace_handler:
> +	/* Pass the untrusted RSP (at exit) to the callback via %rcx. */
> +	mov	%rsp, %rcx
> +
> +	/* Save @e, %rbx is about to be clobbered. */

I've been wondering what that "@e" thing is and I believe I've found it at the
end: "... @e, the optional sgx_enclave_exception struct"

Yes?

Please rewrite what that "e" is here - I think you wanna say "struct
sgx_enclave_run.exception" instead to clarify.

...

> diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h
> new file mode 100644
> index 000000000000..06157b3e9ede
> --- /dev/null
> +++ b/arch/x86/include/asm/enclu.h
> @@ -0,0 +1,8 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_X86_ENCLU_H
> +#define _ASM_X86_ENCLU_H
> +
> +#define EENTER	0x02
> +#define ERESUME	0x03
> +
> +#endif /* _ASM_X86_ENCLU_H */
> diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
> index d0916fb9629e..1564d7f88597 100644
> --- a/arch/x86/include/uapi/asm/sgx.h
> +++ b/arch/x86/include/uapi/asm/sgx.h

Are all those defines being added to the uapi header, also actually
needed by userspace?

> @@ -72,4 +72,132 @@ struct sgx_enclave_provision {
>  	__u64 attribute_fd;
>  };
>  
> +#define SGX_SYNCHRONOUS_EXIT	0
> +#define SGX_EXCEPTION_EXIT	1
> +
> +struct sgx_enclave_run;
> +
> +/**
> + * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
> + *					__vdso_sgx_enter_enclave()
> + *
> + * @rdi:	RDI at the time of enclave exit
> + * @rsi:	RSI at the time of enclave exit
> + * @rdx:	RDX at the time of enclave exit
> + * @ursp:	RSP at the time of enclave exit (untrusted stack)
> + * @r8:		R8 at the time of enclave exit
> + * @r9:		R9 at the time of enclave exit

I'm sure you can avoid that "at the time of enclave exit" repetition.

...

> +/**
> + * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(),
> + *				      a vDSO function to enter an SGX enclave.
> + *
> + * @rdi:	Pass-through value for RDI
> + * @rsi:	Pass-through value for RSI
> + * @rdx:	Pass-through value for RDX
> + * @leaf:	ENCLU leaf, must be EENTER or ERESUME
> + * @r8:		Pass-through value for R8
> + * @r9:		Pass-through value for R9
> + * @r:		struct sgx_enclave_run, must be non-NULL
> + *
> + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the
> + * x86-64 ABI, e.g. doesn't handle XSAVE state.  Except for non-volatile
> + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting
> + * state in accordance with the x86-64 ABI is the responsibility of the enclave
> + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C
> + * code without careful consideration by both the enclave and its runtime.
> + *
> + * All general purpose registers except RAX, RBX and RCX are passed as-is to
> + * the enclave.  RAX, RBX and RCX are consumed by EENTER and ERESUME and are
> + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.

That tcs is probably struct sgx_enclave_run.tcs?

> + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
> + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.

Expand @e and do you mean "@user_handler" with "@handler"?

> + * All other registers are available for use by the enclave and its runtime,
> + * e.g. an enclave can push additional data onto the stack (and modify RSP) to
> + * pass information to the optional exit handler (see below).
> + *
> + * Most exceptions reported on ENCLU, including those that occur within the
> + * enclave, are fixed up and reported synchronously instead of being delivered
> + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
> + * never fixed up and are always delivered via standard signals. On synchrously
> + * reported exceptions, -EFAULT is returned and details about the exception are
> + * recorded in @e, the optional sgx_enclave_exception struct.
> + *
> + * If an exit handler is provided, the handler will be invoked on synchronous
> + * exits from the enclave and for all synchronously reported exceptions. In
> + * latter case, @e is filled prior to invoking the handler.
> + *
> + * The exit handler's return value is interpreted as follows:
> + *  >0:		continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
> + *   0:		success, return @ret to the caller
> + *  <0:		error, return @ret to the caller
> + *
> + * The exit handler may transfer control, e.g. via longjmp() or C++ exception,
> + * without returning to __vdso_sgx_enter_enclave().
> + *
> + * Return:
> + *  0 on success (ENCLU reached),
> + *  -EINVAL if ENCLU leaf is not allowed,
> + *  -errno for all other negative values returned by the userspace exit handler
> + */
> +typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi,
> +					unsigned long rdx, unsigned int leaf,
> +					unsigned long r8,  unsigned long r9,
> +					struct sgx_enclave_run *r);
> +
>  #endif /* _UAPI_ASM_X86_SGX_H */
> -- 
> 2.25.1
>
Jarkko Sakkinen Sept. 25, 2020, 1 a.m. UTC | #2
On Thu, Sep 24, 2020 at 08:04:07PM +0200, Borislav Petkov wrote:
> On Tue, Sep 15, 2020 at 02:28:39PM +0300, Jarkko Sakkinen wrote:
> > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > new file mode 100644
> > index 000000000000..adbd59d41517
> > --- /dev/null
> > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> 
> Why not simply
> 
> arch/x86/entry/vdso/sgx.S
> 
> ?

I renamed it as vsgx.S (for the sake of convention).

> > @@ -0,0 +1,157 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#include <linux/linkage.h>
> > +#include <asm/export.h>
> > +#include <asm/errno.h>
> > +#include <asm/enclu.h>
> > +
> > +#include "extable.h"
> > +
> > +/* Offset of 'struct sgx_enclave_run' relative to %rbp. */
> > +#define SGX_ENCLAVE_RUN_PTR	2*8
> > +
> > +/* Offsets into 'struct sgx_enclave_run'. */
> > +#define SGX_ENCLAVE_RUN_TSC		0*8
> > +#define SGX_ENCLAVE_RUN_FLAGS		1*8
> > +#define SGX_ENCLAVE_RUN_EXIT_REASON	1*8 + 4
> > +#define SGX_ENCLAVE_RUN_USER_HANDLER	2*8
> > +/* #define SGX_ENCLAVE_RUN_USER_DATA	3*8 */
> 
> What's with that guy? Left here as documentation showing what's at 3*8
> offset?

Yes.

> > +#define SGX_ENCLAVE_RUN_EXCEPTION	4*8
> > +
> > +#define SGX_SYNCHRONOUS_EXIT		0
> > +#define SGX_EXCEPTION_EXIT		1
> 
> Those are in ...uapi/asm/sgx.h too. Unify?

I have not authored this patch but what I would propose is to use just
raw value in the place of these constants. It is practially just a
boolean value.

I can also add sgx_vdso.h to uapi directory. I just don't see the point.

Holding on for feedback with this one.

> > +
> > +/* Offsets into sgx_enter_enclave.exception. */
> > +#define SGX_EX_LEAF			0*8
> > +#define SGX_EX_TRAPNR			0*8+4
> > +#define SGX_EX_ERROR_CODE		0*8+6
> > +#define SGX_EX_ADDRESS			1*8
> > +
> > +.code64
> > +.section .text, "ax"
> > +
> > +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > +	/* Prolog */
> > +	.cfi_startproc
> 
> Are the CFI annotations for userspace?

Yes, for stack unwinding. However, I would leave them out. Holding on
for further feedback with this tho.

> > +	push	%rbp
> > +	.cfi_adjust_cfa_offset	8
> > +	.cfi_rel_offset		%rbp, 0
> > +	mov	%rsp, %rbp
> > +	.cfi_def_cfa_register	%rbp
> > +	push	%rbx
> > +	.cfi_rel_offset		%rbx, -8
> > +
> > +	mov	%ecx, %eax
> > +.Lenter_enclave:
> > +	/* EENTER <= leaf <= ERESUME */
> > +	cmp	$EENTER, %eax
> > +	jb	.Linvalid_input
> > +	cmp	$ERESUME, %eax
> > +	ja	.Linvalid_input
> > +
> > +	mov	SGX_ENCLAVE_RUN_PTR(%rbp), %rcx
> > +
> > +	/* No flags are currently defined/supported. */
> > +	cmpl	$0, SGX_ENCLAVE_RUN_FLAGS(%rcx)
> > +	jne	.Linvalid_input
> > +
> > +	/* Load TCS and AEP */
> 
> 		TSC
> 
> I can see why you would write "TCS" though - there's a thread control
> structure thing too in that patch.

Renamed.

> > +	mov	SGX_ENCLAVE_RUN_TSC(%rcx), %rbx
> > +	lea	.Lasync_exit_pointer(%rip), %rcx
> > +
> > +	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
> > +.Lasync_exit_pointer:
> > +.Lenclu_eenter_eresume:
> > +	enclu
> > +
> > +	/* EEXIT jumps here unless the enclave is doing something fancy. */
> > +	mov	SGX_ENCLAVE_RUN_PTR(%rbp), %rbx
> > +
> > +	/* Set exit_reason. */
> > +	movl	$SGX_SYNCHRONOUS_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx)
> > +
> > +	/* Invoke userspace's exit handler if one was provided. */
> > +.Lhandle_exit:
> > +	cmpq	$0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx)
> > +	jne	.Linvoke_userspace_handler
> > +
> > +	/* Success, in the sense that ENCLU was attempted. */
> > +	xor	%eax, %eax
> > +
> > +.Lout:
> > +	pop	%rbx
> > +	leave
> > +	.cfi_def_cfa		%rsp, 8
> > +	ret
> > +
> > +	/* The out-of-line code runs with the pre-leave stack frame. */
> > +	.cfi_def_cfa		%rbp, 16
> > +
> > +.Linvalid_input:
> > +	mov	$(-EINVAL), %eax
> > +	jmp	.Lout
> > +
> > +.Lhandle_exception:
> > +	mov	SGX_ENCLAVE_RUN_PTR(%rbp), %rbx
> > +
> > +	/* Set the exit_reason and exception info. */
> > +	movl	$SGX_EXCEPTION_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx)
> > +
> > +	mov	%eax, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_LEAF)(%rbx)
> > +	mov	%di,  (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_TRAPNR)(%rbx)
> > +	mov	%si,  (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ERROR_CODE)(%rbx)
> > +	mov	%rdx, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ADDRESS)(%rbx)
> > +	jmp	.Lhandle_exit
> > +
> > +.Linvoke_userspace_handler:
> > +	/* Pass the untrusted RSP (at exit) to the callback via %rcx. */
> > +	mov	%rsp, %rcx
> > +
> > +	/* Save @e, %rbx is about to be clobbered. */
> 
> I've been wondering what that "@e" thing is and I believe I've found it at the
> end: "... @e, the optional sgx_enclave_exception struct"
> 
> Yes?
> 
> Please rewrite what that "e" is here - I think you wanna say "struct
> sgx_enclave_run.exception" instead to clarify.

I agree. Open coded them as "struct sgx_enclave_exception".

> 
> ...
> 
> > diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h
> > new file mode 100644
> > index 000000000000..06157b3e9ede
> > --- /dev/null
> > +++ b/arch/x86/include/asm/enclu.h
> > @@ -0,0 +1,8 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef _ASM_X86_ENCLU_H
> > +#define _ASM_X86_ENCLU_H
> > +
> > +#define EENTER	0x02
> > +#define ERESUME	0x03
> > +
> > +#endif /* _ASM_X86_ENCLU_H */
> > diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
> > index d0916fb9629e..1564d7f88597 100644
> > --- a/arch/x86/include/uapi/asm/sgx.h
> > +++ b/arch/x86/include/uapi/asm/sgx.h
> 
> Are all those defines being added to the uapi header, also actually
> needed by userspace?

I'd remove the two constants, as it is clearly just a boolean value.

> > @@ -72,4 +72,132 @@ struct sgx_enclave_provision {
> >  	__u64 attribute_fd;
> >  };
> >  
> > +#define SGX_SYNCHRONOUS_EXIT	0
> > +#define SGX_EXCEPTION_EXIT	1
> > +
> > +struct sgx_enclave_run;
> > +
> > +/**
> > + * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
> > + *					__vdso_sgx_enter_enclave()
> > + *
> > + * @rdi:	RDI at the time of enclave exit
> > + * @rsi:	RSI at the time of enclave exit
> > + * @rdx:	RDX at the time of enclave exit
> > + * @ursp:	RSP at the time of enclave exit (untrusted stack)
> > + * @r8:		R8 at the time of enclave exit
> > + * @r9:		R9 at the time of enclave exit
> 
> I'm sure you can avoid that "at the time of enclave exit" repetition.

I edited this as

/**
 * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
 *					__vdso_sgx_enter_enclave()
 * @rdi:	RDI snapshot
 * @rsi:	RSI snapshot
 * @rdx:	RDX snapshot
 * @rsp:	RSP snapshot (untrusted stack)
 * @r8:		R8 snapshot
 * @r9:		R9 snapshot
 * @run:	Pointer to the caller provided struct sgx_enclave_run
 *
 * Return:
 *  0 or negative to exit vDSO
 *  positive to re-enter enclave (must be EENTER or ERESUME leaf)
 */
typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx,
					  long rsp, long r8, long r9,
					  struct sgx_enclave_run *run);


> > +/**
> > + * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(),
> > + *				      a vDSO function to enter an SGX enclave.
> > + *
> > + * @rdi:	Pass-through value for RDI
> > + * @rsi:	Pass-through value for RSI
> > + * @rdx:	Pass-through value for RDX
> > + * @leaf:	ENCLU leaf, must be EENTER or ERESUME
> > + * @r8:		Pass-through value for R8
> > + * @r9:		Pass-through value for R9
> > + * @r:		struct sgx_enclave_run, must be non-NULL
> > + *
> > + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the
> > + * x86-64 ABI, e.g. doesn't handle XSAVE state.  Except for non-volatile
> > + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting
> > + * state in accordance with the x86-64 ABI is the responsibility of the enclave
> > + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C
> > + * code without careful consideration by both the enclave and its runtime.
> > + *
> > + * All general purpose registers except RAX, RBX and RCX are passed as-is to
> > + * the enclave.  RAX, RBX and RCX are consumed by EENTER and ERESUME and are
> > + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.
> 
> That tcs is probably struct sgx_enclave_run.tcs?

Yes.

> > + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
> > + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.
> 
> Expand @e and do you mean "@user_handler" with "@handler"?

I replaced this and similar fields as @run.user_handler and so forth.

> 
> > + * All other registers are available for use by the enclave and its runtime,
> > + * e.g. an enclave can push additional data onto the stack (and modify RSP) to
> > + * pass information to the optional exit handler (see below).
> > + *
> > + * Most exceptions reported on ENCLU, including those that occur within the
> > + * enclave, are fixed up and reported synchronously instead of being delivered
> > + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
> > + * never fixed up and are always delivered via standard signals. On synchrously
> > + * reported exceptions, -EFAULT is returned and details about the exception are
> > + * recorded in @e, the optional sgx_enclave_exception struct.
> > + *
> > + * If an exit handler is provided, the handler will be invoked on synchronous
> > + * exits from the enclave and for all synchronously reported exceptions. In
> > + * latter case, @e is filled prior to invoking the handler.
> > + *
> > + * The exit handler's return value is interpreted as follows:
> > + *  >0:		continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
> > + *   0:		success, return @ret to the caller
> > + *  <0:		error, return @ret to the caller
> > + *
> > + * The exit handler may transfer control, e.g. via longjmp() or C++ exception,
> > + * without returning to __vdso_sgx_enter_enclave().
> > + *
> > + * Return:
> > + *  0 on success (ENCLU reached),
> > + *  -EINVAL if ENCLU leaf is not allowed,
> > + *  -errno for all other negative values returned by the userspace exit handler
> > + */
> > +typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi,
> > +					unsigned long rdx, unsigned int leaf,
> > +					unsigned long r8,  unsigned long r9,
> > +					struct sgx_enclave_run *r);
> > +
> >  #endif /* _UAPI_ASM_X86_SGX_H */
> > -- 
> > 2.25.1
> > 
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

Also, I renamed 'r' as 'run' in some places.

End result:

https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h

I'm wondering this sentence:

"The calling convention is custom and does not follow System V x86-64 ABI."

AFAIK, now the vDSO is fully C-callable but waiting for feedback before
removing it.

/Jarkko
Jarkko Sakkinen Sept. 25, 2020, 1:04 a.m. UTC | #3
On Thu, Sep 24, 2020 at 05:38:10PM -0700, Sean Christopherson wrote:
> On Thu, Sep 24, 2020 at 08:04:07PM +0200, Borislav Petkov wrote:
> > On Tue, Sep 15, 2020 at 02:28:39PM +0300, Jarkko Sakkinen wrote:
> > > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > new file mode 100644
> > > index 000000000000..adbd59d41517
> > > --- /dev/null
> > > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > 
> > Why not simply
> > 
> > arch/x86/entry/vdso/sgx.S
> > 
> > ?
> 
> I really like typing?

I fixed bunch of things. For that sentence I need feedback + for any
other possible changes please send a patch if required.

/Jarkko
Borislav Petkov Sept. 25, 2020, 8:14 a.m. UTC | #4
On Thu, Sep 24, 2020 at 05:38:10PM -0700, Sean Christopherson wrote:
> > Why not simply
> > 
> > arch/x86/entry/vdso/sgx.S
> > 
> > ?
> 
> I really like typing?

I'll say.

> Yes, to call out that there's a field there, but a field that the vDSO should
> never touch.

You wanna enforce that in the vdso code?

Because if it is there, luserspace will touch it.

> > > +#define SGX_ENCLAVE_RUN_EXCEPTION	4*8
> > > +
> > > +#define SGX_SYNCHRONOUS_EXIT		0
> > > +#define SGX_EXCEPTION_EXIT		1
> > 
> > Those are in ...uapi/asm/sgx.h too. Unify?
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

What about this here?

> > Are the CFI annotations for userspace?
> 
> Yes, though that's a 90% confident "yes".

Looks like we wanna support it:

f24f91088427 ("x86/vdso: Define BUILD_VDSO while building and emit .eh_frame in asm")

> Argh, it's actually supposed to be TCS, SGX_ENCLAVE_RUN_TSC is the one that's
> wrong.

Whoopsie :-)

Thx.
Borislav Petkov Sept. 25, 2020, 8:28 a.m. UTC | #5
On Fri, Sep 25, 2020 at 04:00:40AM +0300, Jarkko Sakkinen wrote:
> I renamed it as vsgx.S (for the sake of convention).

Right.

> I have not authored this patch but what I would propose is to use just
> raw value in the place of these constants. It is practially just a
> boolean value.
> 
> I can also add sgx_vdso.h to uapi directory. I just don't see the point.

Just be very cautious what you add to the uapi/ directory because it
becomes API and there's no changing it. That's why I point you guys to
it, to think hard what you expose there and that it becomes contract
with luserspace.

> > I can see why you would write "TCS" though - there's a thread control
> > structure thing too in that patch.
> 
> Renamed.

See Sean's reply.

> /**
>  * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
>  *					__vdso_sgx_enter_enclave()
>  * @rdi:	RDI snapshot
>  * @rsi:	RSI snapshot
>  * @rdx:	RDX snapshot
>  * @rsp:	RSP snapshot (untrusted stack)
>  * @r8:		R8 snapshot
>  * @r9:		R9 snapshot

I'd say here:

"The registers' content is the snapshot made at enclave exit."

> Also, I renamed 'r' as 'run' in some places.
> 
> End result:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h
> 
> I'm wondering this sentence:
> 
> "The calling convention is custom and does not follow System V x86-64 ABI."

Yeah, I was wondering what that meant too.
Jethro Beekman Sept. 25, 2020, 8:39 a.m. UTC | #6
On 2020-09-25 03:00, Jarkko Sakkinen wrote:
> End result:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h
> 
> I'm wondering this sentence:
> 
> "The calling convention is custom and does not follow System V x86-64 ABI."
> 
> AFAIK, now the vDSO is fully C-callable but waiting for feedback before
> removing it.

It's C-callable *iff your enclave follows the System V x86-64 ABI*. In addition, all registers not clobbered by the SGX ISA are passed to the enclave, not just those specified as parameter passing registers in the ABI. This is intentional to make the vDSO function usable in applications that use the current flexibility of ENCLU.

--
Jethro Beekman | Fortanix
Jarkko Sakkinen Sept. 25, 2020, 10:59 a.m. UTC | #7
On Fri, Sep 25, 2020 at 10:14:41AM +0200, Borislav Petkov wrote:
> > > > +#define SGX_ENCLAVE_RUN_EXCEPTION	4*8
> > > > +
> > > > +#define SGX_SYNCHRONOUS_EXIT		0
> > > > +#define SGX_EXCEPTION_EXIT		1
> > > 
> > > Those are in ...uapi/asm/sgx.h too. Unify?
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> What about this here?

Repeating myself, but since there is only 0 and 1, I would just use 0
and 1.

Otherwise, I think I pretty much got these comments sorted out.

/Jarkko
Jarkko Sakkinen Sept. 25, 2020, 11:17 a.m. UTC | #8
On Fri, Sep 25, 2020 at 10:39:58AM +0200, Jethro Beekman wrote:
> On 2020-09-25 03:00, Jarkko Sakkinen wrote:
> > End result:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h
> > 
> > I'm wondering this sentence:
> > 
> > "The calling convention is custom and does not follow System V x86-64 ABI."
> > 
> > AFAIK, now the vDSO is fully C-callable but waiting for feedback before
> > removing it.
> 
> It's C-callable *iff your enclave follows the System V x86-64 ABI*. In
> addition, all registers not clobbered by the SGX ISA are passed to the
> enclave, not just those specified as parameter passing registers in
> the ABI. This is intentional to make the vDSO function usable in
> applications that use the current flexibility of ENCLU.

Hold on, I really want to fix this bit of documentation before sending
any new version, so I'll explain how I understand it.

I think it is just SystemV ABI call with six parameters in the usual
GPRs (rdi, rsi, rdx, rcx, r8, r9).

I'm looking at vdso_sgx_enter_enclave.

What I'm not getting?

> --
> Jethro Beekman | Fortanix

/Jarkko
Jethro Beekman Sept. 25, 2020, 11:43 a.m. UTC | #9
On 2020-09-25 13:17, Jarkko Sakkinen wrote:
> On Fri, Sep 25, 2020 at 10:39:58AM +0200, Jethro Beekman wrote:
>> On 2020-09-25 03:00, Jarkko Sakkinen wrote:
>>> End result:
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h
>>>
>>> I'm wondering this sentence:
>>>
>>> "The calling convention is custom and does not follow System V x86-64 ABI."
>>>
>>> AFAIK, now the vDSO is fully C-callable but waiting for feedback before
>>> removing it.
>>
>> It's C-callable *iff your enclave follows the System V x86-64 ABI*. In
>> addition, all registers not clobbered by the SGX ISA are passed to the
>> enclave, not just those specified as parameter passing registers in
>> the ABI. This is intentional to make the vDSO function usable in
>> applications that use the current flexibility of ENCLU.
> 
> Hold on, I really want to fix this bit of documentation before sending
> any new version, so I'll explain how I understand it.
> 
> I think it is just SystemV ABI call with six parameters in the usual
> GPRs (rdi, rsi, rdx, rcx, r8, r9).
> 
> I'm looking at vdso_sgx_enter_enclave.
> 
> What I'm not getting?

This can't be observed from looking at the code in vdso_sgx_enter_enclave, because what makes this "custom" is the absence of code or code in the enclave.

If you call vdso_sgx_enter_enclave() from C but your enclave doesn't respect the ABI (for example, it clobbers callee-saved registers), your code will break. But if you call vdso_sgx_enter_enclave from assembly, you can use enclaves with any ABI as long as your assembly and the enclave agree on that ABI.

Alternatively, when using assembly, I can pass stuff to the enclave in registers besides rdi, rsi, rdx, rcx, r8, r9, just as if I were manually calling ENCLU from assembly.

The vDSO assembly has been carefully written to support both scenarios by only using rax, rbx, rcx, rbp, rsp and passing rdi, rsi, rdx, r8, r9 (and everything else).

> + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the
> + * x86-64 ABI, e.g. doesn't handle XSAVE state.  Except for non-volatile
> + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting
> + * state in accordance with the x86-64 ABI is the responsibility of the enclave
> + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C
> + * code without careful consideration by both the enclave and its runtime.
> + *
> + * All general purpose registers except RAX, RBX and RCX are passed as-is to
> + * the enclave.  RAX, RBX and RCX are consumed by EENTER and ERESUME and are
> + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.

Perhaps this should be updated to read "All general purpose registers except RAX, RBX, RCX, RBP and RSP ..."

--
Jethro Beekman | Fortanix
Andrew Cooper Sept. 25, 2020, 6:23 p.m. UTC | #10
On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> new file mode 100644
> index 000000000000..adbd59d41517
> --- /dev/null
> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> @@ -0,0 +1,157 @@
> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> <snip>
> +.Lretpoline:
> +	call	2f
> +1:	pause
> +	lfence
> +	jmp	1b
> +2:	mov	%rax, (%rsp)
> +	ret

I hate to throw further spanners in the work, but this is not compatible
with CET, and the user shadow stack work in progress.

Whichever of these two large series lands first is going to inflict
fixing this problem on the other.

As the vdso text is global (to a first approximation), it must not be a
retpoline if any other process is liable to want to use CET-SS.

If the retpoline really does need to stay, then the vdso probably needs
to gain suitable __x86_indirect_thunk_%reg thunks which are patched at
boot based on the system properties.

~Andrew
Jarkko Sakkinen Sept. 27, 2020, 11:37 p.m. UTC | #11
On Fri, Sep 25, 2020 at 10:28:07AM +0200, Borislav Petkov wrote:
> > > I can see why you would write "TCS" though - there's a thread control
> > > structure thing too in that patch.
> > 
> > Renamed.
> 
> See Sean's reply.

I did not get Sean's reply, and neither can find it from lore:

https://lore.kernel.org/linux-sgx/20200915112842.897265-1-jarkko.sakkinen@linux.intel.com/T/#t

> >  * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
> >  *					__vdso_sgx_enter_enclave()
> >  * @rdi:	RDI snapshot
> >  * @rsi:	RSI snapshot
> >  * @rdx:	RDX snapshot
> >  * @rsp:	RSP snapshot (untrusted stack)
> >  * @r8:		R8 snapshot
> >  * @r9:		R9 snapshot
> 
> I'd say here:
> 
> "The registers' content is the snapshot made at enclave exit."

I'd make that a description and take away individual parameter
descriptions. Is that fine?

> > Also, I renamed 'r' as 'run' in some places.
> > 
> > End result:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h
> > 
> > I'm wondering this sentence:
> > 
> > "The calling convention is custom and does not follow System V x86-64 ABI."
> 
> Yeah, I was wondering what that meant too.

I'll refine that one based on my own and Jethro's feedback.

> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

/Jarkko
Jarkko Sakkinen Sept. 28, 2020, 12:58 a.m. UTC | #12
On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > new file mode 100644
> > index 000000000000..adbd59d41517
> > --- /dev/null
> > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > @@ -0,0 +1,157 @@
> > +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > <snip>
> > +.Lretpoline:
> > +	call	2f
> > +1:	pause
> > +	lfence
> > +	jmp	1b
> > +2:	mov	%rax, (%rsp)
> > +	ret
> 
> I hate to throw further spanners in the work, but this is not compatible
> with CET, and the user shadow stack work in progress.

CET goes beyond my expertise. Can you describe, at least rudimentary,
how this code is not compatible?

I know CET only conceptual level (separate stack holding return
addresses as an measure against return oriented programming (ROP)).

> Whichever of these two large series lands first is going to inflict
> fixing this problem on the other.
> 
> As the vdso text is global (to a first approximation), it must not be a
> retpoline if any other process is liable to want to use CET-SS.

Why is that?

> If the retpoline really does need to stay, then the vdso probably needs
> to gain suitable __x86_indirect_thunk_%reg thunks which are patched at
> boot based on the system properties.
> 
> ~Andrew

aka without CET it is patched? With CET, not?

/Jarkko
Borislav Petkov Sept. 28, 2020, 8:30 a.m. UTC | #13
On Mon, Sep 28, 2020 at 02:37:00AM +0300, Jarkko Sakkinen wrote:
> I did not get Sean's reply, and neither can find it from lore:
> 
> https://lore.kernel.org/linux-sgx/20200915112842.897265-1-jarkko.sakkinen@linux.intel.com/T/#t

Yah, your mail server upgrade broke a lot of stuff. And lore even says
it is not there:

2020-09-25 11:43           ` Jethro Beekman
     [not found]     ` <20200925003808.GB20333@linux.intel.com>		<---
2020-09-25  1:04       ` Jarkko Sakkinen

Lemme bounce it to you.

> I'd make that a description and take away individual parameter
> descriptions. Is that fine?

Sure.
Jarkko Sakkinen Sept. 28, 2020, 2:36 p.m. UTC | #14
On Thu, Sep 24, 2020 at 05:38:10PM -0700, Sean Christopherson wrote:
> > I can see why you would write "TCS" though - there's a thread control
> > structure thing too in that patch.
> 
> Argh, it's actually supposed to be TCS, SGX_ENCLAVE_RUN_TSC is the one that's
> wrong.

So I presume that I fixed this one already:

https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/entry/vdso/vsgx.S

/Jarkko
Jarkko Sakkinen Sept. 28, 2020, 3:02 p.m. UTC | #15
On Mon, Sep 28, 2020 at 10:30:32AM +0200, Borislav Petkov wrote:
> On Mon, Sep 28, 2020 at 02:37:00AM +0300, Jarkko Sakkinen wrote:
> > I did not get Sean's reply, and neither can find it from lore:
> > 
> > https://lore.kernel.org/linux-sgx/20200915112842.897265-1-jarkko.sakkinen@linux.intel.com/T/#t
> 
> Yah, your mail server upgrade broke a lot of stuff. And lore even says
> it is not there:
> 
> 2020-09-25 11:43           ` Jethro Beekman
>      [not found]     ` <20200925003808.GB20333@linux.intel.com>		<---
> 2020-09-25  1:04       ` Jarkko Sakkinen
> 
> Lemme bounce it to you.

Thank you. I think I have it correctly in my tree. And I actually
noticed that I had the original email stored in wrong archive folder on
my machine (sorry about that), so did I receive it after all, but it
does not exist in lore.

> > I'd make that a description and take away individual parameter
> > descriptions. Is that fine?
> 
> Sure.

/**
 * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
 *					__vdso_sgx_enter_enclave()
 * @run:	Pointer to the caller provided struct sgx_enclave_run
 *
 * The register parameters contain the snapshot of their values at enclave
 * exit
 *
 * Return:
 *  0 or negative to exit vDSO
 *  positive to re-enter enclave (must be EENTER or ERESUME leaf)
 */
typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx,
					  long rsp, long r8, long r9,
					  struct sgx_enclave_run *run);

I think this looks reasonable now.

Another minor clean up I made is:

struct sgx_enclave_run {
	__u64 tcs;
	__u32 flags;
	__u32 exit_reason;
	__u64 user_handler;
	__u64 user_data;

I.e. got rid of the "user_handler union. Makes the struc less confusing
looking and is consistent with the other structs.

I've been thinking about this tail:

	union {
		struct sgx_enclave_exception exception;

		/* Pad the entire struct to 256 bytes. */
		__u8 pad[256 - 32];
	};
};

I'd just replace this with

	__u64 exception;
};

And do something like (just writing it to the email to show the idea,
have not even compiled this):

-       mov     %eax, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_LEAF)(%rbx)
-       mov     %di,  (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_TRAPNR)(%rbx)
-       mov     %si,  (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ERROR_CODE)(%rbx)
-       mov     %rdx, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ADDRESS)(%rbx)
+       mov     SGX_ENCLAVE_RUN_EXCEPTION(%rbx), %rbx
+
+       mov     %eax, (SGX_EX_LEAF)(%rbx)
+       mov     %di,  (SGX_EX_TRAPNR)(%rbx)
+       mov     %si,  (SGX_EX_ERROR_CODE)(%rbx)
+       mov     %rdx, (SGX_EX_ADDRESS)(%rbx)

> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

/Jarkko
Yu-cheng Yu Sept. 28, 2020, 3:43 p.m. UTC | #16
On 9/25/2020 11:23 AM, Andrew Cooper wrote:
> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>> new file mode 100644
>> index 000000000000..adbd59d41517
>> --- /dev/null
>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>> @@ -0,0 +1,157 @@
>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
>> <snip>
>> +.Lretpoline:
>> +	call	2f
>> +1:	pause
>> +	lfence
>> +	jmp	1b
>> +2:	mov	%rax, (%rsp)
>> +	ret
> 
> I hate to throw further spanners in the work, but this is not compatible
> with CET, and the user shadow stack work in progress.

Hi Jarkko,

These 1: and 2: targets are reached only from these few lines?  If they 
are direct call/jmp targets, I think it is OK in terms of CET.  If they 
are reached from an instruction like "jmp *%rax", then we need to put in 
an "endbr64".

Yu-cheng

> 
> Whichever of these two large series lands first is going to inflict
> fixing this problem on the other.
> 
> As the vdso text is global (to a first approximation), it must not be a
> retpoline if any other process is liable to want to use CET-SS.
> 
> If the retpoline really does need to stay, then the vdso probably needs
> to gain suitable __x86_indirect_thunk_%reg thunks which are patched at
> boot based on the system properties.
> 
> ~Andrew
>
H.J. Lu Sept. 28, 2020, 3:54 p.m. UTC | #17
On Mon, Sep 28, 2020 at 8:43 AM Yu, Yu-cheng <yu-cheng.yu@intel.com> wrote:
>
> On 9/25/2020 11:23 AM, Andrew Cooper wrote:
> > On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> >> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >> new file mode 100644
> >> index 000000000000..adbd59d41517
> >> --- /dev/null
> >> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >> @@ -0,0 +1,157 @@
> >> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >> <snip>
> >> +.Lretpoline:
> >> +    call    2f
> >> +1:  pause
> >> +    lfence
> >> +    jmp     1b
> >> +2:  mov     %rax, (%rsp)
> >> +    ret
> >
> > I hate to throw further spanners in the work, but this is not compatible
> > with CET, and the user shadow stack work in progress.
>
> Hi Jarkko,
>
> These 1: and 2: targets are reached only from these few lines?  If they
> are direct call/jmp targets, I think it is OK in terms of CET.  If they
> are reached from an instruction like "jmp *%rax", then we need to put in
> an "endbr64".
>

This also isn't compatible with shadow stack.
Yu-cheng Yu Sept. 28, 2020, 4:40 p.m. UTC | #18
On 9/28/2020 8:54 AM, H.J. Lu wrote:
> On Mon, Sep 28, 2020 at 8:43 AM Yu, Yu-cheng <yu-cheng.yu@intel.com> wrote:
>>
>> On 9/25/2020 11:23 AM, Andrew Cooper wrote:
>>> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
>>>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>>> new file mode 100644
>>>> index 000000000000..adbd59d41517
>>>> --- /dev/null
>>>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>>> @@ -0,0 +1,157 @@
>>>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
>>>> <snip>
>>>> +.Lretpoline:
>>>> +    call    2f
>>>> +1:  pause
>>>> +    lfence
>>>> +    jmp     1b
>>>> +2:  mov     %rax, (%rsp)
>>>> +    ret
>>>
>>> I hate to throw further spanners in the work, but this is not compatible
>>> with CET, and the user shadow stack work in progress.
>>
>> Hi Jarkko,
>>
>> These 1: and 2: targets are reached only from these few lines?  If they
>> are direct call/jmp targets, I think it is OK in terms of CET.  If they
>> are reached from an instruction like "jmp *%rax", then we need to put in
>> an "endbr64".
>>
> 
> This also isn't compatible with shadow stack.
> 

Then, when shadow stack is enabled, disable this?

Yu-cheng
Andrew Cooper Sept. 28, 2020, 4:44 p.m. UTC | #19
On 28/09/2020 01:58, Jarkko Sakkinen wrote:
> On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
>> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
>>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>> new file mode 100644
>>> index 000000000000..adbd59d41517
>>> --- /dev/null
>>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>> @@ -0,0 +1,157 @@
>>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
>>> <snip>
>>> +.Lretpoline:
>>> +	call	2f
>>> +1:	pause
>>> +	lfence
>>> +	jmp	1b
>>> +2:	mov	%rax, (%rsp)
>>> +	ret
>> I hate to throw further spanners in the work, but this is not compatible
>> with CET, and the user shadow stack work in progress.
> CET goes beyond my expertise. Can you describe, at least rudimentary,
> how this code is not compatible?

CET Shadow Stacks detect attacks which modify the return address on the
stack.

Retpoline *is* a ROP gadget.  It really does modify the return address
on the stack, even if its purpose is defensive (vs Spectre v2) rather
than malicious.

>> Whichever of these two large series lands first is going to inflict
>> fixing this problem on the other.
>>
>> As the vdso text is global (to a first approximation), it must not be a
>> retpoline if any other process is liable to want to use CET-SS.
> Why is that?

Because when CET-SS is enabled, the ret will suffer a #CP exception
(return address on the stack not matching the one recorded in the shadow
stack), which I presume/hope is wired into SIGSEGV.

~Andrew
H.J. Lu Sept. 28, 2020, 6:07 p.m. UTC | #20
On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>
> On 28/09/2020 01:58, Jarkko Sakkinen wrote:
> > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
> >> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>> new file mode 100644
> >>> index 000000000000..adbd59d41517
> >>> --- /dev/null
> >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>> @@ -0,0 +1,157 @@
> >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >>> <snip>
> >>> +.Lretpoline:
> >>> +   call    2f
> >>> +1: pause
> >>> +   lfence
> >>> +   jmp     1b
> >>> +2: mov     %rax, (%rsp)
> >>> +   ret
> >> I hate to throw further spanners in the work, but this is not compatible
> >> with CET, and the user shadow stack work in progress.
> > CET goes beyond my expertise. Can you describe, at least rudimentary,
> > how this code is not compatible?
>
> CET Shadow Stacks detect attacks which modify the return address on the
> stack.
>
> Retpoline *is* a ROP gadget.  It really does modify the return address
> on the stack, even if its purpose is defensive (vs Spectre v2) rather
> than malicious.
>
> >> Whichever of these two large series lands first is going to inflict
> >> fixing this problem on the other.
> >>
> >> As the vdso text is global (to a first approximation), it must not be a
> >> retpoline if any other process is liable to want to use CET-SS.
> > Why is that?
>
> Because when CET-SS is enabled, the ret will suffer a #CP exception
> (return address on the stack not matching the one recorded in the shadow
> stack), which I presume/hope is wired into SIGSEGV.
>

Here is the CET compatible retpoline:

endbr64
/* Check if shadow stack is in use.  NB: R11 is the only usable
   scratch register for function calls.  */
xorl %r11d, %r11d
rdsspq %r11
testq %r11, %r11
jnz 3f
call 2f
1:
pause
lfence
jmp 1b
2:
mov %rax, (%rsp)
ret
3:
/* Shadow stack is in use.  Make the indirect call.  */
call *%rax
ret
Andy Lutomirski Sept. 28, 2020, 6:12 p.m. UTC | #21
On Mon, Sep 28, 2020 at 11:08 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> >
> > On 28/09/2020 01:58, Jarkko Sakkinen wrote:
> > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
> > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > >>> new file mode 100644
> > >>> index 000000000000..adbd59d41517
> > >>> --- /dev/null
> > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > >>> @@ -0,0 +1,157 @@
> > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > >>> <snip>
> > >>> +.Lretpoline:
> > >>> +   call    2f
> > >>> +1: pause
> > >>> +   lfence
> > >>> +   jmp     1b
> > >>> +2: mov     %rax, (%rsp)
> > >>> +   ret
> > >> I hate to throw further spanners in the work, but this is not compatible
> > >> with CET, and the user shadow stack work in progress.
> > > CET goes beyond my expertise. Can you describe, at least rudimentary,
> > > how this code is not compatible?
> >
> > CET Shadow Stacks detect attacks which modify the return address on the
> > stack.
> >
> > Retpoline *is* a ROP gadget.  It really does modify the return address
> > on the stack, even if its purpose is defensive (vs Spectre v2) rather
> > than malicious.
> >
> > >> Whichever of these two large series lands first is going to inflict
> > >> fixing this problem on the other.
> > >>
> > >> As the vdso text is global (to a first approximation), it must not be a
> > >> retpoline if any other process is liable to want to use CET-SS.
> > > Why is that?
> >
> > Because when CET-SS is enabled, the ret will suffer a #CP exception
> > (return address on the stack not matching the one recorded in the shadow
> > stack), which I presume/hope is wired into SIGSEGV.
> >
>
> Here is the CET compatible retpoline:
>
> endbr64
> /* Check if shadow stack is in use.  NB: R11 is the only usable
>    scratch register for function calls.  */
> xorl %r11d, %r11d
> rdsspq %r11
> testq %r11, %r11
> jnz 3f
> call 2f
> 1:
> pause
> lfence
> jmp 1b
> 2:
> mov %rax, (%rsp)
> ret
> 3:
> /* Shadow stack is in use.  Make the indirect call.  */
> call *%rax
> ret

What do we expect user programs to do on CET systems?  It would be
nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK.

--Andy
Dave Hansen Sept. 28, 2020, 6:17 p.m. UTC | #22
On 9/28/20 11:12 AM, Andy Lutomirski wrote:
>> endbr64
>> /* Check if shadow stack is in use.  NB: R11 is the only usable
>>    scratch register for function calls.  */
>> xorl %r11d, %r11d
>> rdsspq %r11
>> testq %r11, %r11
>> jnz 3f
>> call 2f
>> 1:
>> pause
>> lfence
>> jmp 1b
>> 2:
>> mov %rax, (%rsp)
>> ret
>> 3:
>> /* Shadow stack is in use.  Make the indirect call.  */
>> call *%rax
>> ret
> What do we expect user programs to do on CET systems?  It would be
> nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK.

Shouldn't we just be able to use X86_FEATURE_RETPOLINE?

We probably need a mechanism to force X86_FEATURE_SHSTK and
X86_FEATURE_RETPOLINE to be mutually exclusive if we don't have one already.
Jarkko Sakkinen Sept. 28, 2020, 8:42 p.m. UTC | #23
On Mon, Sep 28, 2020 at 05:44:35PM +0100, Andrew Cooper wrote:
> On 28/09/2020 01:58, Jarkko Sakkinen wrote:
> > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
> >> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>> new file mode 100644
> >>> index 000000000000..adbd59d41517
> >>> --- /dev/null
> >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>> @@ -0,0 +1,157 @@
> >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >>> <snip>
> >>> +.Lretpoline:
> >>> +	call	2f
> >>> +1:	pause
> >>> +	lfence
> >>> +	jmp	1b
> >>> +2:	mov	%rax, (%rsp)
> >>> +	ret
> >> I hate to throw further spanners in the work, but this is not compatible
> >> with CET, and the user shadow stack work in progress.
> > CET goes beyond my expertise. Can you describe, at least rudimentary,
> > how this code is not compatible?
> 
> CET Shadow Stacks detect attacks which modify the return address on the
> stack.
> 
> Retpoline *is* a ROP gadget.  It really does modify the return address
> on the stack, even if its purpose is defensive (vs Spectre v2) rather
> than malicious.

Aah. I get that, yes.

Kernel is full of retpoline but I presume that ring-0 does not use CET.

The situation with callback is follows: for a run-time the user_handler
by all practical means is always the same. There is ever only one user
handler that gets executed. I.e. the indirect callback will always lead
to the same thing. I wonder how much assets an adversary would get if
we just remove retpoline bits (not much thinking done yet on that).

/Jarkko
Jarkko Sakkinen Sept. 28, 2020, 8:56 p.m. UTC | #24
On Mon, Sep 28, 2020 at 08:43:16AM -0700, Yu, Yu-cheng wrote:
> On 9/25/2020 11:23 AM, Andrew Cooper wrote:
> > On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> > > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > new file mode 100644
> > > index 000000000000..adbd59d41517
> > > --- /dev/null
> > > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > @@ -0,0 +1,157 @@
> > > +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > > <snip>
> > > +.Lretpoline:
> > > +	call	2f
> > > +1:	pause
> > > +	lfence
> > > +	jmp	1b
> > > +2:	mov	%rax, (%rsp)
> > > +	ret
> > 
> > I hate to throw further spanners in the work, but this is not compatible
> > with CET, and the user shadow stack work in progress.
> 
> Hi Jarkko,
> 
> These 1: and 2: targets are reached only from these few lines?  If they are
> direct call/jmp targets, I think it is OK in terms of CET.  If they are
> reached from an instruction like "jmp *%rax", then we need to put in an
> "endbr64".
> 
> Yu-cheng

The invocation is over here:

	/* Load the callback pointer to %rax and invoke it via retpoline. */
	mov	SGX_ENCLAVE_RUN_USER_HANDLER(%rax), %rax
	call	.Lretpoline

/Jarkko
Jarkko Sakkinen Sept. 28, 2020, 9:36 p.m. UTC | #25
On Mon, Sep 28, 2020 at 08:54:01AM -0700, H.J. Lu wrote:
> On Mon, Sep 28, 2020 at 8:43 AM Yu, Yu-cheng <yu-cheng.yu@intel.com> wrote:
> >
> > On 9/25/2020 11:23 AM, Andrew Cooper wrote:
> > > On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> > >> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > >> new file mode 100644
> > >> index 000000000000..adbd59d41517
> > >> --- /dev/null
> > >> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > >> @@ -0,0 +1,157 @@
> > >> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > >> <snip>
> > >> +.Lretpoline:
> > >> +    call    2f
> > >> +1:  pause
> > >> +    lfence
> > >> +    jmp     1b
> > >> +2:  mov     %rax, (%rsp)
> > >> +    ret
> > >
> > > I hate to throw further spanners in the work, but this is not compatible
> > > with CET, and the user shadow stack work in progress.
> >
> > Hi Jarkko,
> >
> > These 1: and 2: targets are reached only from these few lines?  If they
> > are direct call/jmp targets, I think it is OK in terms of CET.  If they
> > are reached from an instruction like "jmp *%rax", then we need to put in
> > an "endbr64".
> >
> 
> This also isn't compatible with shadow stack.
> 
> -- 
> H.J.

I have the now full picture of the problem thanks to Andrew's response
[1]. And Dave Hansen just explained me in detail the context and
background with [2]. So I'd guess this will get sorted out soon.

If you don't mind I'll CC you to this commit when I send the next
version?

[1] https://lkml.org/lkml/2020/9/28/1153
[2] https://lkml.org/lkml/2020/9/25/1122

/Jarkko
Jarkko Sakkinen Sept. 28, 2020, 9:41 p.m. UTC | #26
On Mon, Sep 28, 2020 at 11:07:47AM -0700, H.J. Lu wrote:
> On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> >
> > On 28/09/2020 01:58, Jarkko Sakkinen wrote:
> > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
> > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > >>> new file mode 100644
> > >>> index 000000000000..adbd59d41517
> > >>> --- /dev/null
> > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > >>> @@ -0,0 +1,157 @@
> > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > >>> <snip>
> > >>> +.Lretpoline:
> > >>> +   call    2f
> > >>> +1: pause
> > >>> +   lfence
> > >>> +   jmp     1b
> > >>> +2: mov     %rax, (%rsp)
> > >>> +   ret
> > >> I hate to throw further spanners in the work, but this is not compatible
> > >> with CET, and the user shadow stack work in progress.
> > > CET goes beyond my expertise. Can you describe, at least rudimentary,
> > > how this code is not compatible?
> >
> > CET Shadow Stacks detect attacks which modify the return address on the
> > stack.
> >
> > Retpoline *is* a ROP gadget.  It really does modify the return address
> > on the stack, even if its purpose is defensive (vs Spectre v2) rather
> > than malicious.
> >
> > >> Whichever of these two large series lands first is going to inflict
> > >> fixing this problem on the other.
> > >>
> > >> As the vdso text is global (to a first approximation), it must not be a
> > >> retpoline if any other process is liable to want to use CET-SS.
> > > Why is that?
> >
> > Because when CET-SS is enabled, the ret will suffer a #CP exception
> > (return address on the stack not matching the one recorded in the shadow
> > stack), which I presume/hope is wired into SIGSEGV.
> >
> 
> Here is the CET compatible retpoline:
> 
> endbr64
> /* Check if shadow stack is in use.  NB: R11 is the only usable
>    scratch register for function calls.  */
> xorl %r11d, %r11d
> rdsspq %r11
> testq %r11, %r11
> jnz 3f
> call 2f
> 1:
> pause
> lfence
> jmp 1b
> 2:
> mov %rax, (%rsp)
> ret
> 3:
> /* Shadow stack is in use.  Make the indirect call.  */
> call *%rax
> ret

Right, so I have actually two alternatives: this and boot time patching:

https://lkml.org/lkml/2020/9/25/1122

/Jarkko
Jarkko Sakkinen Sept. 28, 2020, 9:56 p.m. UTC | #27
On Mon, Sep 28, 2020 at 11:12:08AM -0700, Andy Lutomirski wrote:
> On Mon, Sep 28, 2020 at 11:08 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> > >
> > > On 28/09/2020 01:58, Jarkko Sakkinen wrote:
> > > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
> > > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> > > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > >>> new file mode 100644
> > > >>> index 000000000000..adbd59d41517
> > > >>> --- /dev/null
> > > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > >>> @@ -0,0 +1,157 @@
> > > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > > >>> <snip>
> > > >>> +.Lretpoline:
> > > >>> +   call    2f
> > > >>> +1: pause
> > > >>> +   lfence
> > > >>> +   jmp     1b
> > > >>> +2: mov     %rax, (%rsp)
> > > >>> +   ret
> > > >> I hate to throw further spanners in the work, but this is not compatible
> > > >> with CET, and the user shadow stack work in progress.
> > > > CET goes beyond my expertise. Can you describe, at least rudimentary,
> > > > how this code is not compatible?
> > >
> > > CET Shadow Stacks detect attacks which modify the return address on the
> > > stack.
> > >
> > > Retpoline *is* a ROP gadget.  It really does modify the return address
> > > on the stack, even if its purpose is defensive (vs Spectre v2) rather
> > > than malicious.
> > >
> > > >> Whichever of these two large series lands first is going to inflict
> > > >> fixing this problem on the other.
> > > >>
> > > >> As the vdso text is global (to a first approximation), it must not be a
> > > >> retpoline if any other process is liable to want to use CET-SS.
> > > > Why is that?
> > >
> > > Because when CET-SS is enabled, the ret will suffer a #CP exception
> > > (return address on the stack not matching the one recorded in the shadow
> > > stack), which I presume/hope is wired into SIGSEGV.
> > >
> >
> > Here is the CET compatible retpoline:
> >
> > endbr64
> > /* Check if shadow stack is in use.  NB: R11 is the only usable
> >    scratch register for function calls.  */
> > xorl %r11d, %r11d
> > rdsspq %r11
> > testq %r11, %r11
> > jnz 3f
> > call 2f
> > 1:
> > pause
> > lfence
> > jmp 1b
> > 2:
> > mov %rax, (%rsp)
> > ret
> > 3:
> > /* Shadow stack is in use.  Make the indirect call.  */
> > call *%rax
> > ret
> 
> What do we expect user programs to do on CET systems?  It would be
> nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK.
> 
> --Andy

I'm open to do either solution. My thinking was to initially do things
vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the
above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed
on details how that thing works and it should be perfectly usable for
our use case.

/Jarkko
H.J. Lu Sept. 28, 2020, 10:06 p.m. UTC | #28
On Mon, Sep 28, 2020 at 2:56 PM Jarkko Sakkinen
<jarkko.sakkinen@linux.intel.com> wrote:
>
> On Mon, Sep 28, 2020 at 11:12:08AM -0700, Andy Lutomirski wrote:
> > On Mon, Sep 28, 2020 at 11:08 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> > > >
> > > > On 28/09/2020 01:58, Jarkko Sakkinen wrote:
> > > > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
> > > > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> > > > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > > >>> new file mode 100644
> > > > >>> index 000000000000..adbd59d41517
> > > > >>> --- /dev/null
> > > > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > > >>> @@ -0,0 +1,157 @@
> > > > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > > > >>> <snip>
> > > > >>> +.Lretpoline:
> > > > >>> +   call    2f
> > > > >>> +1: pause
> > > > >>> +   lfence
> > > > >>> +   jmp     1b
> > > > >>> +2: mov     %rax, (%rsp)
> > > > >>> +   ret
> > > > >> I hate to throw further spanners in the work, but this is not compatible
> > > > >> with CET, and the user shadow stack work in progress.
> > > > > CET goes beyond my expertise. Can you describe, at least rudimentary,
> > > > > how this code is not compatible?
> > > >
> > > > CET Shadow Stacks detect attacks which modify the return address on the
> > > > stack.
> > > >
> > > > Retpoline *is* a ROP gadget.  It really does modify the return address
> > > > on the stack, even if its purpose is defensive (vs Spectre v2) rather
> > > > than malicious.
> > > >
> > > > >> Whichever of these two large series lands first is going to inflict
> > > > >> fixing this problem on the other.
> > > > >>
> > > > >> As the vdso text is global (to a first approximation), it must not be a
> > > > >> retpoline if any other process is liable to want to use CET-SS.
> > > > > Why is that?
> > > >
> > > > Because when CET-SS is enabled, the ret will suffer a #CP exception
> > > > (return address on the stack not matching the one recorded in the shadow
> > > > stack), which I presume/hope is wired into SIGSEGV.
> > > >
> > >
> > > Here is the CET compatible retpoline:
> > >
> > > endbr64
> > > /* Check if shadow stack is in use.  NB: R11 is the only usable
> > >    scratch register for function calls.  */
> > > xorl %r11d, %r11d
> > > rdsspq %r11
> > > testq %r11, %r11
> > > jnz 3f
> > > call 2f
> > > 1:
> > > pause
> > > lfence
> > > jmp 1b
> > > 2:
> > > mov %rax, (%rsp)
> > > ret
> > > 3:
> > > /* Shadow stack is in use.  Make the indirect call.  */
> > > call *%rax
> > > ret
> >
> > What do we expect user programs to do on CET systems?  It would be
> > nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK.
> >
> > --Andy
>
> I'm open to do either solution. My thinking was to initially do things
> vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the
> above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed
> on details how that thing works and it should be perfectly usable for
> our use case.
>

Since SHSTK and IBT are enabled per process, not the whole machine,
are you going to patch vDSO on a per-process basis?
Jarkko Sakkinen Sept. 28, 2020, 10:07 p.m. UTC | #29
On Mon, Sep 28, 2020 at 11:17:42AM -0700, Dave Hansen wrote:
> On 9/28/20 11:12 AM, Andy Lutomirski wrote:
> >> endbr64
> >> /* Check if shadow stack is in use.  NB: R11 is the only usable
> >>    scratch register for function calls.  */
> >> xorl %r11d, %r11d
> >> rdsspq %r11
> >> testq %r11, %r11
> >> jnz 3f
> >> call 2f
> >> 1:
> >> pause
> >> lfence
> >> jmp 1b
> >> 2:
> >> mov %rax, (%rsp)
> >> ret
> >> 3:
> >> /* Shadow stack is in use.  Make the indirect call.  */
> >> call *%rax
> >> ret
> > What do we expect user programs to do on CET systems?  It would be
> > nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK.
> 
> Shouldn't we just be able to use X86_FEATURE_RETPOLINE?
> 
> We probably need a mechanism to force X86_FEATURE_SHSTK and
> X86_FEATURE_RETPOLINE to be mutually exclusive if we don't have one already.

First of all: lets go with boot time patching instead of dynamic
detection. It's both easier to implement and by all other merits makes a
lot more sense. It was just a thing that I've not used before.

That sorted out, does it matter which direction I look it at? I could
use either feature flag as basis (and I do not have a personal
preference here).

/Jarkko
Dave Hansen Sept. 28, 2020, 10:18 p.m. UTC | #30
On 9/28/20 3:06 PM, H.J. Lu wrote:
>> I'm open to do either solution. My thinking was to initially do things
>> vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the
>> above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed
>> on details how that thing works and it should be perfectly usable for
>> our use case.
>>
> Since SHSTK and IBT are enabled per process, not the whole machine,
> are you going to patch vDSO on a per-process basis?

No.

Retpolines mitigate Spectre v2 attacks.  If you're not vulnerable to
Spectre v2, you don't need retpolines.

All processors which support CET *also* have hardware mitigations
against Spectre v2 and don't need retpolines.  Here's all of the
possibilities:

CET=y, BUG_SPECTRE_V2=y: does not exist
CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline
CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
Andy Lutomirski Sept. 28, 2020, 10:41 p.m. UTC | #31
On Mon, Sep 28, 2020 at 3:18 PM Dave Hansen <dave.hansen@intel.com> wrote:
>
> On 9/28/20 3:06 PM, H.J. Lu wrote:
> >> I'm open to do either solution. My thinking was to initially do things
> >> vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the
> >> above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed
> >> on details how that thing works and it should be perfectly usable for
> >> our use case.
> >>
> > Since SHSTK and IBT are enabled per process, not the whole machine,
> > are you going to patch vDSO on a per-process basis?
>
> No.
>
> Retpolines mitigate Spectre v2 attacks.  If you're not vulnerable to
> Spectre v2, you don't need retpolines.
>
> All processors which support CET *also* have hardware mitigations
> against Spectre v2 and don't need retpolines.  Here's all of the
> possibilities:
>
> CET=y, BUG_SPECTRE_V2=y: does not exist
> CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline
> CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
> CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable

Just to confirm: does this mean that the CPU mitigates against user
code mistraining the branch predictors for CPL0?  Because this is the
vDSO, and the situation we're actually concerned about is user code
mistraining its own branch predictors.  This could happen
cross-process or within the same process.
Andrew Cooper Sept. 28, 2020, 11:38 p.m. UTC | #32
On 28/09/2020 23:41, Andy Lutomirski wrote:
> On Mon, Sep 28, 2020 at 3:18 PM Dave Hansen <dave.hansen@intel.com> wrote:
>> On 9/28/20 3:06 PM, H.J. Lu wrote:
>>>> I'm open to do either solution. My thinking was to initially do things
>>>> vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the
>>>> above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed
>>>> on details how that thing works and it should be perfectly usable for
>>>> our use case.
>>>>
>>> Since SHSTK and IBT are enabled per process, not the whole machine,
>>> are you going to patch vDSO on a per-process basis?
>> No.
>>
>> Retpolines mitigate Spectre v2 attacks.  If you're not vulnerable to
>> Spectre v2, you don't need retpolines.
>>
>> All processors which support CET *also* have hardware mitigations
>> against Spectre v2 and don't need retpolines.  Here's all of the
>> possibilities:
>>
>> CET=y, BUG_SPECTRE_V2=y: does not exist
>> CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline
>> CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
>> CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
> Just to confirm: does this mean that the CPU mitigates against user
> code mistraining the branch predictors for CPL0?

If (and only if) you have eIBRS enabled.

eIBRS should be available on all CET-capable hardware, and Linux ought
to use it by default.

> Because this is the
> vDSO, and the situation we're actually concerned about is user code
> mistraining its own branch predictors.  This could happen
> cross-process or within the same process.

There is nothing (in Intel parts) which prevents mode same-mode training
of indirect branches, either in user or kernel space.

However, an IBPB on context switch should prevent cross-process trailing
attacks.

~Andrew
Andrew Cooper Sept. 28, 2020, 11:52 p.m. UTC | #33
On 28/09/2020 21:42, Jarkko Sakkinen wrote:
> On Mon, Sep 28, 2020 at 05:44:35PM +0100, Andrew Cooper wrote:
>> On 28/09/2020 01:58, Jarkko Sakkinen wrote:
>>> On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
>>>> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
>>>>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>>>> new file mode 100644
>>>>> index 000000000000..adbd59d41517
>>>>> --- /dev/null
>>>>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>>>> @@ -0,0 +1,157 @@
>>>>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
>>>>> <snip>
>>>>> +.Lretpoline:
>>>>> +	call	2f
>>>>> +1:	pause
>>>>> +	lfence
>>>>> +	jmp	1b
>>>>> +2:	mov	%rax, (%rsp)
>>>>> +	ret
>>>> I hate to throw further spanners in the work, but this is not compatible
>>>> with CET, and the user shadow stack work in progress.
>>> CET goes beyond my expertise. Can you describe, at least rudimentary,
>>> how this code is not compatible?
>> CET Shadow Stacks detect attacks which modify the return address on the
>> stack.
>>
>> Retpoline *is* a ROP gadget.  It really does modify the return address
>> on the stack, even if its purpose is defensive (vs Spectre v2) rather
>> than malicious.
> Aah. I get that, yes.
>
> Kernel is full of retpoline but I presume that ring-0 does not use CET.

No-one has implemented support CET-SS support for Linux itself yet, but
other kernels do have it working.

~Andrew
Dave Hansen Sept. 29, 2020, 2:10 p.m. UTC | #34
On 9/28/20 4:38 PM, Andrew Cooper wrote:
>>> CET=y, BUG_SPECTRE_V2=y: does not exist
>>> CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline
>>> CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
>>> CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
>> Just to confirm: does this mean that the CPU mitigates against user
>> code mistraining the branch predictors for CPL0?
> If (and only if) you have eIBRS enabled.
> 
> eIBRS should be available on all CET-capable hardware, and Linux ought
> to use it by default.

You're totally right, of course.  I was (wrongly) thinking about this
VDSO retpoline as kernel code.

There's another wrinkle here.  Let's say we're vulnerable to a
Spectre-v2-style attack and we want to mitigate it on CET hardware that
has enhanced IBRS.  I'm not sure how reliable of a mitigation retpolines
are on enhanced IBRS hardware.  Intel has recommended _against_ using
them in some cases:

> https://software.intel.com/security-software-guidance/api-app/sites/default/files/Retpoline-A-Branch-Target-Injection-Mitigation.pdf

"On processors that support enhanced IBRS, it should be used for
mitigation instead of retpoline."

I actually authored that bit of the whitepaper, and I recall that this
was not simply a recommendation based on performance advantages of using
enhanced IBRS.  I can dig through some old email if we decide that we
want to explore using a retpoline on enhanced IBRS hardware.

But, let's take a step back.  The changelog for this patch needs to at
least have:

1. What is the attack being mitigated by the retpoline?
2. Do we actually want to mitigate it?
3. What options are there to mitigate it?
4. Which option does this patch use and why?

Right now, there's not even a comment about this.
Andrew Cooper Sept. 29, 2020, 3:01 p.m. UTC | #35
On 29/09/2020 15:10, Dave Hansen wrote:
> On 9/28/20 4:38 PM, Andrew Cooper wrote:
>>>> CET=y, BUG_SPECTRE_V2=y: does not exist
>>>> CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline
>>>> CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
>>>> CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
>>> Just to confirm: does this mean that the CPU mitigates against user
>>> code mistraining the branch predictors for CPL0?
>> If (and only if) you have eIBRS enabled.
>>
>> eIBRS should be available on all CET-capable hardware, and Linux ought
>> to use it by default.
> You're totally right, of course.  I was (wrongly) thinking about this
> VDSO retpoline as kernel code.
>
> There's another wrinkle here.  Let's say we're vulnerable to a
> Spectre-v2-style attack and we want to mitigate it on CET hardware that
> has enhanced IBRS.  I'm not sure how reliable of a mitigation retpolines
> are on enhanced IBRS hardware.  Intel has recommended _against_ using
> them in some cases:
>
>> https://software.intel.com/security-software-guidance/api-app/sites/default/files/Retpoline-A-Branch-Target-Injection-Mitigation.pdf
> "On processors that support enhanced IBRS, it should be used for
> mitigation instead of retpoline."
> I actually authored that bit of the whitepaper, and I recall that this
> was not simply a recommendation based on performance advantages of using
> enhanced IBRS.  I can dig through some old email if we decide that we
> want to explore using a retpoline on enhanced IBRS hardware.

If only life were simple.

In light of https://arxiv.org/abs/2008.02307 which managed to
demonstrate that the original KAISER was actually a speculative attack
and nothing to do with the prefetch instruction, a discussion about
same-mode training happened.

The updated recommendation given was to continue using retpoline as well
as eIBRS to prevent same-mode training of the syscall indirect branch. 
Josh (CC'd) has been doing a lot of work to find and fix other
speculative leaks in this area.

For Skylake uarch and later, even if an RSB underflow leads to a BTB
lookup, it still requires an interrupt/NMI to hit one of two instruction
boundaries to empty the RSB, and an attacker with that level of control
probably has more interesting things to be trying to do.

Without retpoline (or something even more expensive such as IRET-ing
around), an attacker can still create speculative type confusion between
different system calls, when eIBRS is active.

Once you mix CET-SS in, this breaks, unless you're prepared to update
the retpoline gadget to include a WRSS to modify the shadow stack
alongside the regular stack.  Add this to the large pile of fun for
whomever has the privileg^W chore of implementing supervisor CET support.

>
> But, let's take a step back.  The changelog for this patch needs to at
> least have:
>
> 1. What is the attack being mitigated by the retpoline?
> 2. Do we actually want to mitigate it?
> 3. What options are there to mitigate it?
> 4. Which option does this patch use and why?
>
> Right now, there's not even a comment about this.

I agree.  The reason for using a retpoline here in the first place is
unclear.

~Andrew
Jarkko Sakkinen Sept. 30, 2020, 12:52 a.m. UTC | #36
On Tue, Sep 29, 2020 at 12:52:35AM +0100, Andrew Cooper wrote:
> On 28/09/2020 21:42, Jarkko Sakkinen wrote:
> > On Mon, Sep 28, 2020 at 05:44:35PM +0100, Andrew Cooper wrote:
> >> On 28/09/2020 01:58, Jarkko Sakkinen wrote:
> >>> On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote:
> >>>> On 15/09/2020 12:28, Jarkko Sakkinen wrote:
> >>>>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>>>> new file mode 100644
> >>>>> index 000000000000..adbd59d41517
> >>>>> --- /dev/null
> >>>>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>>>> @@ -0,0 +1,157 @@
> >>>>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >>>>> <snip>
> >>>>> +.Lretpoline:
> >>>>> +	call	2f
> >>>>> +1:	pause
> >>>>> +	lfence
> >>>>> +	jmp	1b
> >>>>> +2:	mov	%rax, (%rsp)
> >>>>> +	ret
> >>>> I hate to throw further spanners in the work, but this is not compatible
> >>>> with CET, and the user shadow stack work in progress.
> >>> CET goes beyond my expertise. Can you describe, at least rudimentary,
> >>> how this code is not compatible?
> >> CET Shadow Stacks detect attacks which modify the return address on the
> >> stack.
> >>
> >> Retpoline *is* a ROP gadget.  It really does modify the return address
> >> on the stack, even if its purpose is defensive (vs Spectre v2) rather
> >> than malicious.
> > Aah. I get that, yes.
> >
> > Kernel is full of retpoline but I presume that ring-0 does not use CET.
> 
> No-one has implemented support CET-SS support for Linux itself yet, but
> other kernels do have it working.
> 
> ~Andrew

Haitao, can you point out the user handler callback in the Intel
SGX runtime?

There is only one single global callback in a practical deployment
provided by the runtime. AFAIK, it just copies values, does not do any
rountrips and is generally very trivial peace of code but it is better
to check it before final say.

I've now experimented with ALTERNATIVE() and it can be definitely made
work. I'm just thinking that would it be best not use retpoline at all?

My guess is that the callback would not have much applicability in
Spectre'ish attacks but do not have enough expertise on that to even
semiformally conclude it.

My intention is to find reasonable conclusions to drop it instead of
adding more complexity to the vDSO.

/Jarkko
diff mbox series

Patch

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 3f183d0b8826..416f9432269d 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -29,6 +29,7 @@  VDSO32-$(CONFIG_IA32_EMULATION)	:= y
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
 vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
 vobjs32-y += vdso32/vclock_gettime.o
+vobjs-$(VDSO64-y)		+= vsgx_enter_enclave.o
 
 # files to link into kernel
 obj-y				+= vma.o extable.o
@@ -100,6 +101,7 @@  $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
 CFLAGS_REMOVE_vclock_gettime.o = -pg
 CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 CFLAGS_REMOVE_vgetcpu.o = -pg
+CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
 
 #
 # X32 processes use x32 vDSO to access 64bit kernel data.
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index 36b644e16272..4bf48462fca7 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -27,6 +27,7 @@  VERSION {
 		__vdso_time;
 		clock_getres;
 		__vdso_clock_getres;
+		__vdso_sgx_enter_enclave;
 	local: *;
 	};
 }
diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
new file mode 100644
index 000000000000..adbd59d41517
--- /dev/null
+++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
@@ -0,0 +1,157 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+#include <asm/errno.h>
+#include <asm/enclu.h>
+
+#include "extable.h"
+
+/* Offset of 'struct sgx_enclave_run' relative to %rbp. */
+#define SGX_ENCLAVE_RUN_PTR	2*8
+
+/* Offsets into 'struct sgx_enclave_run'. */
+#define SGX_ENCLAVE_RUN_TSC		0*8
+#define SGX_ENCLAVE_RUN_FLAGS		1*8
+#define SGX_ENCLAVE_RUN_EXIT_REASON	1*8 + 4
+#define SGX_ENCLAVE_RUN_USER_HANDLER	2*8
+/* #define SGX_ENCLAVE_RUN_USER_DATA	3*8 */
+#define SGX_ENCLAVE_RUN_EXCEPTION	4*8
+
+#define SGX_SYNCHRONOUS_EXIT		0
+#define SGX_EXCEPTION_EXIT		1
+
+/* Offsets into sgx_enter_enclave.exception. */
+#define SGX_EX_LEAF			0*8
+#define SGX_EX_TRAPNR			0*8+4
+#define SGX_EX_ERROR_CODE		0*8+6
+#define SGX_EX_ADDRESS			1*8
+
+.code64
+.section .text, "ax"
+
+SYM_FUNC_START(__vdso_sgx_enter_enclave)
+	/* Prolog */
+	.cfi_startproc
+	push	%rbp
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%rbp, 0
+	mov	%rsp, %rbp
+	.cfi_def_cfa_register	%rbp
+	push	%rbx
+	.cfi_rel_offset		%rbx, -8
+
+	mov	%ecx, %eax
+.Lenter_enclave:
+	/* EENTER <= leaf <= ERESUME */
+	cmp	$EENTER, %eax
+	jb	.Linvalid_input
+	cmp	$ERESUME, %eax
+	ja	.Linvalid_input
+
+	mov	SGX_ENCLAVE_RUN_PTR(%rbp), %rcx
+
+	/* No flags are currently defined/supported. */
+	cmpl	$0, SGX_ENCLAVE_RUN_FLAGS(%rcx)
+	jne	.Linvalid_input
+
+	/* Load TCS and AEP */
+	mov	SGX_ENCLAVE_RUN_TSC(%rcx), %rbx
+	lea	.Lasync_exit_pointer(%rip), %rcx
+
+	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
+.Lasync_exit_pointer:
+.Lenclu_eenter_eresume:
+	enclu
+
+	/* EEXIT jumps here unless the enclave is doing something fancy. */
+	mov	SGX_ENCLAVE_RUN_PTR(%rbp), %rbx
+
+	/* Set exit_reason. */
+	movl	$SGX_SYNCHRONOUS_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx)
+
+	/* Invoke userspace's exit handler if one was provided. */
+.Lhandle_exit:
+	cmpq	$0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx)
+	jne	.Linvoke_userspace_handler
+
+	/* Success, in the sense that ENCLU was attempted. */
+	xor	%eax, %eax
+
+.Lout:
+	pop	%rbx
+	leave
+	.cfi_def_cfa		%rsp, 8
+	ret
+
+	/* The out-of-line code runs with the pre-leave stack frame. */
+	.cfi_def_cfa		%rbp, 16
+
+.Linvalid_input:
+	mov	$(-EINVAL), %eax
+	jmp	.Lout
+
+.Lhandle_exception:
+	mov	SGX_ENCLAVE_RUN_PTR(%rbp), %rbx
+
+	/* Set the exit_reason and exception info. */
+	movl	$SGX_EXCEPTION_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx)
+
+	mov	%eax, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_LEAF)(%rbx)
+	mov	%di,  (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_TRAPNR)(%rbx)
+	mov	%si,  (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ERROR_CODE)(%rbx)
+	mov	%rdx, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ADDRESS)(%rbx)
+	jmp	.Lhandle_exit
+
+.Linvoke_userspace_handler:
+	/* Pass the untrusted RSP (at exit) to the callback via %rcx. */
+	mov	%rsp, %rcx
+
+	/* Save @e, %rbx is about to be clobbered. */
+	mov	%rbx, %rax
+
+	/* Save the untrusted RSP offset in %rbx (non-volatile register). */
+	mov	%rsp, %rbx
+	and	$0xf, %rbx
+
+	/*
+	 * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
+	 * _after_ pushing the parameters on the stack, hence the bonus push.
+	 */
+	and	$-0x10, %rsp
+	push	%rax
+
+	/* Push @e as a param to the callback. */
+	push	%rax
+
+	/* Clear RFLAGS.DF per x86_64 ABI */
+	cld
+
+	/* Load the callback pointer to %rax and invoke it via retpoline. */
+	mov	SGX_ENCLAVE_RUN_USER_HANDLER(%rax), %rax
+	call	.Lretpoline
+
+	/* Undo the post-exit %rsp adjustment. */
+	lea	0x10(%rsp, %rbx), %rsp
+
+	/*
+	 * If the return from callback is zero or negative, return immediately,
+	 * else re-execute ENCLU with the postive return value interpreted as
+	 * the requested ENCLU leaf.
+	 */
+	cmp	$0, %eax
+	jle	.Lout
+	jmp	.Lenter_enclave
+
+.Lretpoline:
+	call	2f
+1:	pause
+	lfence
+	jmp	1b
+2:	mov	%rax, (%rsp)
+	ret
+	.cfi_endproc
+
+_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
+
+SYM_FUNC_END(__vdso_sgx_enter_enclave)
diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h
new file mode 100644
index 000000000000..06157b3e9ede
--- /dev/null
+++ b/arch/x86/include/asm/enclu.h
@@ -0,0 +1,8 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ENCLU_H
+#define _ASM_X86_ENCLU_H
+
+#define EENTER	0x02
+#define ERESUME	0x03
+
+#endif /* _ASM_X86_ENCLU_H */
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index d0916fb9629e..1564d7f88597 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -72,4 +72,132 @@  struct sgx_enclave_provision {
 	__u64 attribute_fd;
 };
 
+#define SGX_SYNCHRONOUS_EXIT	0
+#define SGX_EXCEPTION_EXIT	1
+
+struct sgx_enclave_run;
+
+/**
+ * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
+ *					__vdso_sgx_enter_enclave()
+ *
+ * @rdi:	RDI at the time of enclave exit
+ * @rsi:	RSI at the time of enclave exit
+ * @rdx:	RDX at the time of enclave exit
+ * @ursp:	RSP at the time of enclave exit (untrusted stack)
+ * @r8:		R8 at the time of enclave exit
+ * @r9:		R9 at the time of enclave exit
+ * @r:		Pointer to struct sgx_enclave_run (as provided by caller)
+ *
+ * Return:
+ *  0 or negative to exit vDSO
+ *  positive to re-enter enclave (must be EENTER or ERESUME leaf)
+ */
+typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx,
+					  long ursp, long r8, long r9,
+					  struct sgx_enclave_run *r);
+
+/**
+ * struct sgx_enclave_exception - structure to report exceptions encountered in
+ *				  __vdso_sgx_enter_enclave()
+ *
+ * @leaf:	ENCLU leaf from \%eax at time of exception
+ * @trapnr:	exception trap number, a.k.a. fault vector
+ * @error_code:	exception error code
+ * @address:	exception address, e.g. CR2 on a #PF
+ */
+struct sgx_enclave_exception {
+	__u32 leaf;
+	__u16 trapnr;
+	__u16 error_code;
+	__u64 address;
+};
+
+/**
+ * struct sgx_enclave_run - Control structure for __vdso_sgx_enter_enclave()
+ *
+ * @tcs:		Thread Control Structure used to enter enclave
+ * @flags:		Control flags
+ * @exit_reason:	Cause of exit from enclave, e.g. EEXIT vs. exception
+ * @user_handler:	User provided exit handler (optional)
+ * @user_data:		User provided opaque value (optional)
+ * @exception:		Valid on exit due to exception
+ */
+struct sgx_enclave_run {
+	__u64 tcs;
+	__u32 flags;
+	__u32 exit_reason;
+
+	union {
+		sgx_enclave_exit_handler_t user_handler;
+		__u64 __user_handler;
+	};
+	__u64 user_data;
+
+	union {
+		struct sgx_enclave_exception exception;
+
+		/* Pad the entire struct to 256 bytes. */
+		__u8 pad[256 - 32];
+	};
+};
+
+/**
+ * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(),
+ *				      a vDSO function to enter an SGX enclave.
+ *
+ * @rdi:	Pass-through value for RDI
+ * @rsi:	Pass-through value for RSI
+ * @rdx:	Pass-through value for RDX
+ * @leaf:	ENCLU leaf, must be EENTER or ERESUME
+ * @r8:		Pass-through value for R8
+ * @r9:		Pass-through value for R9
+ * @r:		struct sgx_enclave_run, must be non-NULL
+ *
+ * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the
+ * x86-64 ABI, e.g. doesn't handle XSAVE state.  Except for non-volatile
+ * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting
+ * state in accordance with the x86-64 ABI is the responsibility of the enclave
+ * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C
+ * code without careful consideration by both the enclave and its runtime.
+ *
+ * All general purpose registers except RAX, RBX and RCX are passed as-is to
+ * the enclave.  RAX, RBX and RCX are consumed by EENTER and ERESUME and are
+ * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.
+ *
+ * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
+ * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.
+ * All other registers are available for use by the enclave and its runtime,
+ * e.g. an enclave can push additional data onto the stack (and modify RSP) to
+ * pass information to the optional exit handler (see below).
+ *
+ * Most exceptions reported on ENCLU, including those that occur within the
+ * enclave, are fixed up and reported synchronously instead of being delivered
+ * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
+ * never fixed up and are always delivered via standard signals. On synchrously
+ * reported exceptions, -EFAULT is returned and details about the exception are
+ * recorded in @e, the optional sgx_enclave_exception struct.
+ *
+ * If an exit handler is provided, the handler will be invoked on synchronous
+ * exits from the enclave and for all synchronously reported exceptions. In
+ * latter case, @e is filled prior to invoking the handler.
+ *
+ * The exit handler's return value is interpreted as follows:
+ *  >0:		continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
+ *   0:		success, return @ret to the caller
+ *  <0:		error, return @ret to the caller
+ *
+ * The exit handler may transfer control, e.g. via longjmp() or C++ exception,
+ * without returning to __vdso_sgx_enter_enclave().
+ *
+ * Return:
+ *  0 on success (ENCLU reached),
+ *  -EINVAL if ENCLU leaf is not allowed,
+ *  -errno for all other negative values returned by the userspace exit handler
+ */
+typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi,
+					unsigned long rdx, unsigned int leaf,
+					unsigned long r8,  unsigned long r9,
+					struct sgx_enclave_run *r);
+
 #endif /* _UAPI_ASM_X86_SGX_H */