diff mbox series

[10/10] x86/virt/tdx: Allow SEAMCALL to handle #UD and #GP

Message ID d08c5e27dd7377564d69648f3eb7b56d3c95b84b.1689151537.git.kai.huang@intel.com (mailing list archive)
State New, archived
Headers show
Series Unify TDCALL/SEAMCALL and TDVMCALL assembly | expand

Commit Message

Huang, Kai July 12, 2023, 8:55 a.m. UTC
On the platform with the "partial write machine check" erratum, a kernel
partial write to TDX private memory may cause unexpected machine check.
It would be nice if the #MC handler could print additional information
to show the #MC was TDX private memory error due to possible kernel bug.

To do that, the machine check handler needs to use SEAMCALL to query
page type of the error memory from the TDX module, because there's no
existing infrastructure to track TDX private pages.

SEAMCALL instruction causes #UD if CPU isn't in VMX operation.  In #MC
handler, it is legal that CPU isn't in VMX operation when making this
SEAMCALL.  Extend the TDX_MODULE_CALL macro to handle #UD so the
SEAMCALL can return error code instead of Oops in the #MC handler.
Opportunistically handles #GP too since they share the same code.

A bonus is when kernel mistakenly calls SEAMCALL when CPU isn't in VMX
operation, or when TDX isn't enabled by the BIOS, or when the BIOS is
buggy, the kernel can get a nicer error message rather than a less
understandable Oops.

This is basically based on Peter's code.

Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Kai Huang <kai.huang@intel.com>
---
 arch/x86/include/asm/tdx.h      |  5 +++++
 arch/x86/virt/vmx/tdx/tdxcall.S | 20 ++++++++++++++++++++
 2 files changed, 25 insertions(+)

Comments

Peter Zijlstra July 13, 2023, 8:07 a.m. UTC | #1
On Wed, Jul 12, 2023 at 08:55:24PM +1200, Kai Huang wrote:
> @@ -85,6 +86,7 @@
>  	.endif	/* \saved */
>  
>  	.if \host
> +1:
>  	seamcall
>  	/*
>  	 * SEAMCALL instruction is essentially a VMExit from VMX root
> @@ -99,6 +101,7 @@
>  	 */
>  	mov $TDX_SEAMCALL_VMFAILINVALID, %rdi
>  	cmovc %rdi, %rax
> +2:
>  	.else
>  	tdcall
>  	.endif

This is just wrong, if the thing traps you should not do the return
registers. And at this point the mov/cmovc thing doesn't make much sense
either.

> @@ -185,4 +188,21 @@
>  
>  	FRAME_END
>  	RET
> +
> +	.if \host
> +3:
> +	/*
> +	 * SEAMCALL caused #GP or #UD.  By reaching here %eax contains
> +	 * the trap number.  Convert the trap number to the TDX error
> +	 * code by setting TDX_SW_ERROR to the high 32-bits of %rax.
> +	 *
> +	 * Note cannot OR TDX_SW_ERROR directly to %rax as OR instruction
> +	 * only accepts 32-bit immediate at most.
> +	 */
> +	movq $TDX_SW_ERROR, %r12
> +	orq  %r12, %rax
> +	jmp  2b
> +
> +	_ASM_EXTABLE_FAULT(1b, 3b)
> +	.endif	/* \host */
>  .endm

Also, please used named labels where possible and *please* keep asm
directives unindented.


--- a/arch/x86/virt/vmx/tdx/tdxcall.S
+++ b/arch/x86/virt/vmx/tdx/tdxcall.S
@@ -56,7 +56,7 @@
 	movq	TDX_MODULE_r10(%rsi), %r10
 	movq	TDX_MODULE_r11(%rsi), %r11
 
-	.if \saved
+.if \saved
 	/*
 	 * Move additional input regs from the structure.  For simplicity
 	 * assume that anything needs the callee-saved regs also tramples
@@ -75,18 +75,18 @@
 	movq	TDX_MODULE_r15(%rsi), %r15
 	movq	TDX_MODULE_rbx(%rsi), %rbx
 
-	.if \ret
+.if \ret
 	/* Save the structure pointer as %rsi is about to be clobbered */
 	pushq	%rsi
-	.endif
+.endif
 
 	movq	TDX_MODULE_rdi(%rsi), %rdi
 	/* %rsi needs to be done at last */
 	movq	TDX_MODULE_rsi(%rsi), %rsi
-	.endif	/* \saved */
+.endif	/* \saved */
 
-	.if \host
-1:
+.if \host
+.Lseamcall:
 	seamcall
 	/*
 	 * SEAMCALL instruction is essentially a VMExit from VMX root
@@ -99,15 +99,13 @@
 	 * This value will never be used as actual SEAMCALL error code as
 	 * it is from the Reserved status code class.
 	 */
-	mov $TDX_SEAMCALL_VMFAILINVALID, %rdi
-	cmovc %rdi, %rax
-2:
-	.else
+	jc	.Lseamfail
+.else
 	tdcall
-	.endif
+.endif
 
-	.if \ret
-	.if \saved
+.if \ret
+.if \saved
 	/*
 	 * Restore the structure from stack to saved the output registers
 	 *
@@ -136,7 +134,7 @@
 	movq %r15, TDX_MODULE_r15(%rsi)
 	movq %rbx, TDX_MODULE_rbx(%rsi)
 	movq %rdi, TDX_MODULE_rdi(%rsi)
-	.endif	/* \saved */
+.endif	/* \saved */
 
 	/* Copy output regs to the structure */
 	movq %rcx, TDX_MODULE_rcx(%rsi)
@@ -145,10 +143,11 @@
 	movq %r9,  TDX_MODULE_r9(%rsi)
 	movq %r10, TDX_MODULE_r10(%rsi)
 	movq %r11, TDX_MODULE_r11(%rsi)
-	.endif	/* \ret */
+.endif	/* \ret */
 
-	.if \saved
-	.if \ret
+.Lout:
+.if \saved
+.if \ret
 	/*
 	 * Clear registers shared by guest for VP.ENTER and VP.VMCALL to
 	 * prevent speculative use of values from guest/VMM, including
@@ -170,13 +169,8 @@
 	xorq %r9,  %r9
 	xorq %r10, %r10
 	xorq %r11, %r11
-	xorq %r12, %r12
-	xorq %r13, %r13
-	xorq %r14, %r14
-	xorq %r15, %r15
-	xorq %rbx, %rbx
 	xorq %rdi, %rdi
-	.endif	/* \ret */
+.endif	/* \ret */
 
 	/* Restore callee-saved GPRs as mandated by the x86_64 ABI */
 	popq	%r15
@@ -184,13 +178,17 @@
 	popq	%r13
 	popq	%r12
 	popq	%rbx
-	.endif	/* \saved */
+.endif	/* \saved */
 
 	FRAME_END
 	RET
 
-	.if \host
-3:
+.if \host
+.Lseamfail:
+	mov	$TDX_SEAMCALL_VMFAILINVALID, %rax
+	jmp	.Lout
+
+.Lseamtrap:
 	/*
 	 * SEAMCALL caused #GP or #UD.  By reaching here %eax contains
 	 * the trap number.  Convert the trap number to the TDX error
@@ -201,8 +199,8 @@
 	 */
 	movq $TDX_SW_ERROR, %r12
 	orq  %r12, %rax
-	jmp  2b
+	jmp .Lout
 
-	_ASM_EXTABLE_FAULT(1b, 3b)
-	.endif	/* \host */
+	_ASM_EXTABLE_FAULT(.Lseamcall, .Lseamtrap)
+.endif	/* \host */
 .endm
Huang, Kai July 13, 2023, 9:58 a.m. UTC | #2
On Thu, 2023-07-13 at 10:07 +0200, Peter Zijlstra wrote:
> On Wed, Jul 12, 2023 at 08:55:24PM +1200, Kai Huang wrote:
> > @@ -85,6 +86,7 @@
> >  	.endif	/* \saved */
> >  
> >  	.if \host
> > +1:
> >  	seamcall
> >  	/*
> >  	 * SEAMCALL instruction is essentially a VMExit from VMX root
> > @@ -99,6 +101,7 @@
> >  	 */
> >  	mov $TDX_SEAMCALL_VMFAILINVALID, %rdi
> >  	cmovc %rdi, %rax
> > +2:
> >  	.else
> >  	tdcall
> >  	.endif
> 
> This is just wrong, if the thing traps you should not do the return
> registers. And at this point the mov/cmovc thing doesn't make much sense
> either.

OK will do.  Yes "do return registers" isn't necessary.  I thought to keep code
simple we can just do it.  The trap/VMFAILINVALID code path isn't in performance
path anyway.

This is a problem in the current upstream code too.  I'll fix it first in a
separate patch.

> 
> > @@ -185,4 +188,21 @@
> >  
> >  	FRAME_END
> >  	RET
> > +
> > +	.if \host
> > +3:
> > +	/*
> > +	 * SEAMCALL caused #GP or #UD.  By reaching here %eax contains
> > +	 * the trap number.  Convert the trap number to the TDX error
> > +	 * code by setting TDX_SW_ERROR to the high 32-bits of %rax.
> > +	 *
> > +	 * Note cannot OR TDX_SW_ERROR directly to %rax as OR instruction
> > +	 * only accepts 32-bit immediate at most.
> > +	 */
> > +	movq $TDX_SW_ERROR, %r12
> > +	orq  %r12, %rax
> > +	jmp  2b
> > +
> > +	_ASM_EXTABLE_FAULT(1b, 3b)
> > +	.endif	/* \host */
> >  .endm
> 
> Also, please used named labels where possible and *please* keep asm
> directives unindented.

Yes will do.

> 
> 
> --- a/arch/x86/virt/vmx/tdx/tdxcall.S
> +++ b/arch/x86/virt/vmx/tdx/tdxcall.S
> @@ -56,7 +56,7 @@
>  	movq	TDX_MODULE_r10(%rsi), %r10
>  	movq	TDX_MODULE_r11(%rsi), %r11
>  
> -	.if \saved
> +.if \saved
>  	/*
>  	 * Move additional input regs from the structure.  For simplicity
>  	 * assume that anything needs the callee-saved regs also tramples
> @@ -75,18 +75,18 @@
>  	movq	TDX_MODULE_r15(%rsi), %r15
>  	movq	TDX_MODULE_rbx(%rsi), %rbx
>  
> -	.if \ret
> +.if \ret
>  	/* Save the structure pointer as %rsi is about to be clobbered */
>  	pushq	%rsi
> -	.endif
> +.endif
>  
>  	movq	TDX_MODULE_rdi(%rsi), %rdi
>  	/* %rsi needs to be done at last */
>  	movq	TDX_MODULE_rsi(%rsi), %rsi
> -	.endif	/* \saved */
> +.endif	/* \saved */
>  
> -	.if \host
> -1:
> +.if \host
> +.Lseamcall:
>  	seamcall
>  	/*
>  	 * SEAMCALL instruction is essentially a VMExit from VMX root
> @@ -99,15 +99,13 @@
>  	 * This value will never be used as actual SEAMCALL error code as
>  	 * it is from the Reserved status code class.
>  	 */
> -	mov $TDX_SEAMCALL_VMFAILINVALID, %rdi
> -	cmovc %rdi, %rax
> -2:
> -	.else
> +	jc	.Lseamfail
> +.else
>  	tdcall
> -	.endif
> +.endif
>  
> -	.if \ret
> -	.if \saved
> +.if \ret
> +.if \saved
>  	/*
>  	 * Restore the structure from stack to saved the output registers
>  	 *
> @@ -136,7 +134,7 @@
>  	movq %r15, TDX_MODULE_r15(%rsi)
>  	movq %rbx, TDX_MODULE_rbx(%rsi)
>  	movq %rdi, TDX_MODULE_rdi(%rsi)
> -	.endif	/* \saved */
> +.endif	/* \saved */
>  
>  	/* Copy output regs to the structure */
>  	movq %rcx, TDX_MODULE_rcx(%rsi)
> @@ -145,10 +143,11 @@
>  	movq %r9,  TDX_MODULE_r9(%rsi)
>  	movq %r10, TDX_MODULE_r10(%rsi)
>  	movq %r11, TDX_MODULE_r11(%rsi)
> -	.endif	/* \ret */
> +.endif	/* \ret */
>  
> -	.if \saved
> -	.if \ret
> +.Lout:
> +.if \saved
> +.if \ret
>  	/*
>  	 * Clear registers shared by guest for VP.ENTER and VP.VMCALL to
>  	 * prevent speculative use of values from guest/VMM, including
> @@ -170,13 +169,8 @@
>  	xorq %r9,  %r9
>  	xorq %r10, %r10
>  	xorq %r11, %r11
> -	xorq %r12, %r12
> -	xorq %r13, %r13
> -	xorq %r14, %r14
> -	xorq %r15, %r15
> -	xorq %rbx, %rbx
>  	xorq %rdi, %rdi
> -	.endif	/* \ret */
> +.endif	/* \ret */
>  
>  	/* Restore callee-saved GPRs as mandated by the x86_64 ABI */
>  	popq	%r15
> @@ -184,13 +178,17 @@
>  	popq	%r13
>  	popq	%r12
>  	popq	%rbx
> -	.endif	/* \saved */
> +.endif	/* \saved */
>  
>  	FRAME_END
>  	RET
>  
> -	.if \host
> -3:
> +.if \host
> +.Lseamfail:
> +	mov	$TDX_SEAMCALL_VMFAILINVALID, %rax
> +	jmp	.Lout
> +
> +.Lseamtrap:
>  	/*
>  	 * SEAMCALL caused #GP or #UD.  By reaching here %eax contains
>  	 * the trap number.  Convert the trap number to the TDX error
> @@ -201,8 +199,8 @@
>  	 */
>  	movq $TDX_SW_ERROR, %r12
>  	orq  %r12, %rax
> -	jmp  2b
> +	jmp .Lout

Thanks for the code.

There might be stack balancing issue here.  I'll double check when updating this
patch.

Thanks!

>  
> -	_ASM_EXTABLE_FAULT(1b, 3b)
> -	.endif	/* \host */
> +	_ASM_EXTABLE_FAULT(.Lseamcall, .Lseamtrap)
> +.endif	/* \host */
>  .endm
diff mbox series

Patch

diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index a82e5249d079..feb85316346e 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -8,6 +8,8 @@ 
 #include <asm/ptrace.h>
 #include <asm/shared/tdx.h>
 
+#include <asm/trapnr.h>
+
 /*
  * SW-defined error codes.
  *
@@ -18,6 +20,9 @@ 
 #define TDX_SW_ERROR			(TDX_ERROR | GENMASK_ULL(47, 40))
 #define TDX_SEAMCALL_VMFAILINVALID	(TDX_SW_ERROR | _UL(0xFFFF0000))
 
+#define TDX_SEAMCALL_GP			(TDX_SW_ERROR | X86_TRAP_GP)
+#define TDX_SEAMCALL_UD			(TDX_SW_ERROR | X86_TRAP_UD)
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
index e4e90ebf5dad..04b0c466f38c 100644
--- a/arch/x86/virt/vmx/tdx/tdxcall.S
+++ b/arch/x86/virt/vmx/tdx/tdxcall.S
@@ -2,6 +2,7 @@ 
 #include <asm/asm-offsets.h>
 #include <asm/frame.h>
 #include <asm/tdx.h>
+#include <asm/asm.h>
 
 /*
  * TDCALL and SEAMCALL are supported in Binutils >= 2.36.
@@ -85,6 +86,7 @@ 
 	.endif	/* \saved */
 
 	.if \host
+1:
 	seamcall
 	/*
 	 * SEAMCALL instruction is essentially a VMExit from VMX root
@@ -99,6 +101,7 @@ 
 	 */
 	mov $TDX_SEAMCALL_VMFAILINVALID, %rdi
 	cmovc %rdi, %rax
+2:
 	.else
 	tdcall
 	.endif
@@ -185,4 +188,21 @@ 
 
 	FRAME_END
 	RET
+
+	.if \host
+3:
+	/*
+	 * SEAMCALL caused #GP or #UD.  By reaching here %eax contains
+	 * the trap number.  Convert the trap number to the TDX error
+	 * code by setting TDX_SW_ERROR to the high 32-bits of %rax.
+	 *
+	 * Note cannot OR TDX_SW_ERROR directly to %rax as OR instruction
+	 * only accepts 32-bit immediate at most.
+	 */
+	movq $TDX_SW_ERROR, %r12
+	orq  %r12, %rax
+	jmp  2b
+
+	_ASM_EXTABLE_FAULT(1b, 3b)
+	.endif	/* \host */
 .endm