diff mbox series

[v3,47/75] x86/sev-es: Add Runtime #VC Exception Handler

Message ID 20200428151725.31091-48-joro@8bytes.org (mailing list archive)
State New, archived
Headers show
Series x86: SEV-ES Guest Support | expand

Commit Message

Joerg Roedel April 28, 2020, 3:16 p.m. UTC
From: Tom Lendacky <thomas.lendacky@amd.com>

Add the handler for #VC exceptions invoked at runtime.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/x86/entry/entry_64.S    |   4 +
 arch/x86/include/asm/traps.h |   7 ++
 arch/x86/kernel/idt.c        |   4 +-
 arch/x86/kernel/sev-es.c     | 167 ++++++++++++++++++++++++++++++++++-
 4 files changed, 180 insertions(+), 2 deletions(-)

Comments

Borislav Petkov May 23, 2020, 7:59 a.m. UTC | #1
On Tue, Apr 28, 2020 at 05:16:57PM +0200, Joerg Roedel wrote:
> diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
> index a4fa7f351bf2..bc3a58427028 100644
> --- a/arch/x86/kernel/sev-es.c
> +++ b/arch/x86/kernel/sev-es.c
> @@ -10,6 +10,7 @@
>  #include <linux/sched/debug.h>	/* For show_regs() */
>  #include <linux/percpu-defs.h>
>  #include <linux/mem_encrypt.h>
> +#include <linux/lockdep.h>
>  #include <linux/printk.h>
>  #include <linux/mm_types.h>
>  #include <linux/set_memory.h>
> @@ -25,7 +26,7 @@
>  #include <asm/insn-eval.h>
>  #include <asm/fpu/internal.h>
>  #include <asm/processor.h>
> -#include <asm/trap_defs.h>
> +#include <asm/traps.h>
>  #include <asm/svm.h>
>  
>  /* For early boot hypervisor communication in SEV-ES enabled guests */
> @@ -46,10 +47,26 @@ struct sev_es_runtime_data {
>  
>  	/* Physical storage for the per-cpu IST stacks of the #VC handler */
>  	struct vmm_exception_stacks vc_stacks __aligned(PAGE_SIZE);
> +
> +	/* Reserve on page per CPU as backup storage for the unencrypted GHCB */

		  one

> +	struct ghcb backup_ghcb;

I could use some text explaining what those backups are for?

> +	/*
> +	 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
> +	 * There is no need for it to be atomic, because nothing is written to
> +	 * the GHCB between the read and the write of ghcb_active. So it is safe
> +	 * to use it when a nested #VC exception happens before the write.
> +	 */

Looks liks that is that text... support for nested #VC exceptions.
I'm sure this has come up already but why do we even want to support
nested #VCs? IOW, can we do without them first or are they absolutely
necessary?

I'm guessing VC exceptions inside the VC handler but what are the
sensible use cases?

Thx.
Joerg Roedel June 11, 2020, 11:48 a.m. UTC | #2
On Sat, May 23, 2020 at 09:59:24AM +0200, Borislav Petkov wrote:
> On Tue, Apr 28, 2020 at 05:16:57PM +0200, Joerg Roedel wrote:
> > +	/*
> > +	 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
> > +	 * There is no need for it to be atomic, because nothing is written to
> > +	 * the GHCB between the read and the write of ghcb_active. So it is safe
> > +	 * to use it when a nested #VC exception happens before the write.
> > +	 */
> 
> Looks liks that is that text... support for nested #VC exceptions.
> I'm sure this has come up already but why do we even want to support
> nested #VCs? IOW, can we do without them first or are they absolutely
> necessary?
> 
> I'm guessing VC exceptions inside the VC handler but what are the
> sensible use cases?

The most important use-case is #VC->NMI->#VC. When an NMI hits while the
#VC handler uses the GHCB and the NMI handler causes another #VC, then
the contents of the GHCB needs to be backed up, so that it doesn't
destroy the GHCB contents of the first #VC handling path.


Regards,

	Joerg
Joerg Roedel June 11, 2020, 11:53 a.m. UTC | #3
On Sat, May 23, 2020 at 09:59:24AM +0200, Borislav Petkov wrote:
> On Tue, Apr 28, 2020 at 05:16:57PM +0200, Joerg Roedel wrote:
> > +	struct ghcb backup_ghcb;
> 
> I could use some text explaining what those backups are for?

I added another comment above that line to better explain why it is
needed.


	Joerg
Sean Christopherson June 11, 2020, 5:38 p.m. UTC | #4
On Thu, Jun 11, 2020 at 01:48:31PM +0200, Joerg Roedel wrote:
> On Sat, May 23, 2020 at 09:59:24AM +0200, Borislav Petkov wrote:
> > On Tue, Apr 28, 2020 at 05:16:57PM +0200, Joerg Roedel wrote:
> > > +	/*
> > > +	 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
> > > +	 * There is no need for it to be atomic, because nothing is written to
> > > +	 * the GHCB between the read and the write of ghcb_active. So it is safe
> > > +	 * to use it when a nested #VC exception happens before the write.
> > > +	 */
> > 
> > Looks liks that is that text... support for nested #VC exceptions.
> > I'm sure this has come up already but why do we even want to support
> > nested #VCs? IOW, can we do without them first or are they absolutely
> > necessary?
> > 
> > I'm guessing VC exceptions inside the VC handler but what are the
> > sensible use cases?
> 
> The most important use-case is #VC->NMI->#VC. When an NMI hits while the
> #VC handler uses the GHCB and the NMI handler causes another #VC, then
> the contents of the GHCB needs to be backed up, so that it doesn't
> destroy the GHCB contents of the first #VC handling path.

Isn't it possible for the #VC handler to hit a #PF, e.g. on copy_from_user()
in insn_fetch_from_user()?  If that happens, what prevents the #PF handler
from hitting a #VC?  AIUI, do_vmm_communication() panics if the backup GHCB
is already in use, e.g. #VC->#PF->#VC->NMI->#VC would be fatal.
Joerg Roedel June 11, 2020, 6:16 p.m. UTC | #5
On Thu, Jun 11, 2020 at 10:38:48AM -0700, Sean Christopherson wrote:
> Isn't it possible for the #VC handler to hit a #PF, e.g. on copy_from_user()
> in insn_fetch_from_user()?  If that happens, what prevents the #PF handler
> from hitting a #VC?  AIUI, do_vmm_communication() panics if the backup GHCB
> is already in use, e.g. #VC->#PF->#VC->NMI->#VC would be fatal.

This would be possible, if the #PF handler is able to trigger a #VC. But
I am not sure how it could, except when it has to call printk.

If #PF can trigger a #VC, then a fix is to further limit the time the
GHCB is active, and make sure there are not faults of any kind when it
actually is active. Then the only vector to care about is NMI. I will
look into that.

Regards,

	Joerg
Borislav Petkov June 12, 2020, 1:13 p.m. UTC | #6
On Thu, Jun 11, 2020 at 01:48:31PM +0200, Joerg Roedel wrote:
> The most important use-case is #VC->NMI->#VC. When an NMI hits while the
> #VC handler uses the GHCB and the NMI handler causes another #VC, then
> the contents of the GHCB needs to be backed up, so that it doesn't
> destroy the GHCB contents of the first #VC handling path.

That's a good example, please add it to the next version of the patch,
preferrably in a comment somewhere.

Thx.
diff mbox series

Patch

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0e9504fabe52..4c392eb2f063 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1210,6 +1210,10 @@  idtentry async_page_fault	do_async_page_fault	has_error_code=1	read_cr2=1
 idtentry machine_check		do_mce			has_error_code=0	paranoid=1
 #endif
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+idtentry vmm_communication	do_vmm_communication	has_error_code=1	paranoid=1 shift_ist=IST_INDEX_VC ist_offset=VC_STACK_OFFSET
+#endif
+
 /*
  * Save all registers in pt_regs, and switch gs if needed.
  * Use slow, but surefire "are we in kernel?" check.
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 104991c05425..37f6e86ac53a 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -35,6 +35,9 @@  asmlinkage void alignment_check(void);
 #ifdef CONFIG_X86_MCE
 asmlinkage void machine_check(void);
 #endif /* CONFIG_X86_MCE */
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+asmlinkage void vmm_communication(void);
+#endif
 asmlinkage void simd_coprocessor_error(void);
 
 #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
@@ -83,6 +86,10 @@  dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_co
 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
 dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
 dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code);
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+dotraplinkage void do_vmm_communication_error(struct pt_regs *regs,
+					      long error_code);
+#endif
 #ifdef CONFIG_X86_32
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code);
 #endif
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 135d208a2d38..e32cc5f3fa94 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -88,7 +88,6 @@  static const __initconst struct idt_data def_idts[] = {
 #ifdef CONFIG_X86_MCE
 	INTG(X86_TRAP_MC,		&machine_check),
 #endif
-
 	SYSG(X86_TRAP_OF,		overflow),
 #if defined(CONFIG_IA32_EMULATION)
 	SYSG(IA32_SYSCALL_VECTOR,	entry_INT80_compat),
@@ -185,6 +184,9 @@  static const __initconst struct idt_data ist_idts[] = {
 #ifdef CONFIG_X86_MCE
 	ISTG(X86_TRAP_MC,	&machine_check,	IST_INDEX_MCE),
 #endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+	ISTG(X86_TRAP_VC,	vmm_communication, IST_INDEX_VC),
+#endif
 };
 
 /*
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index a4fa7f351bf2..bc3a58427028 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -10,6 +10,7 @@ 
 #include <linux/sched/debug.h>	/* For show_regs() */
 #include <linux/percpu-defs.h>
 #include <linux/mem_encrypt.h>
+#include <linux/lockdep.h>
 #include <linux/printk.h>
 #include <linux/mm_types.h>
 #include <linux/set_memory.h>
@@ -25,7 +26,7 @@ 
 #include <asm/insn-eval.h>
 #include <asm/fpu/internal.h>
 #include <asm/processor.h>
-#include <asm/trap_defs.h>
+#include <asm/traps.h>
 #include <asm/svm.h>
 
 /* For early boot hypervisor communication in SEV-ES enabled guests */
@@ -46,10 +47,26 @@  struct sev_es_runtime_data {
 
 	/* Physical storage for the per-cpu IST stacks of the #VC handler */
 	struct vmm_exception_stacks vc_stacks __aligned(PAGE_SIZE);
+
+	/* Reserve on page per CPU as backup storage for the unencrypted GHCB */
+	struct ghcb backup_ghcb;
+
+	/*
+	 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
+	 * There is no need for it to be atomic, because nothing is written to
+	 * the GHCB between the read and the write of ghcb_active. So it is safe
+	 * to use it when a nested #VC exception happens before the write.
+	 */
+	bool ghcb_active;
+	bool backup_ghcb_active;
 };
 
 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
 
+struct ghcb_state {
+	struct ghcb *ghcb;
+};
+
 /*
  * Shift/Unshift the IST entry for the #VC handler during
  * nmi_enter()/nmi_exit().  This is needed when an NMI hits in the #VC handlers
@@ -70,6 +87,53 @@  void sev_es_nmi_exit(void)
 	tss->x86_tss.ist[IST_INDEX_VC] += VC_STACK_OFFSET;
 }
 
+static struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
+{
+	struct sev_es_runtime_data *data;
+	struct ghcb *ghcb;
+
+	data = this_cpu_read(runtime_data);
+	ghcb = &data->ghcb_page;
+
+	if (unlikely(data->ghcb_active)) {
+		/* GHCB is already in use - save its contents */
+
+		if (unlikely(data->backup_ghcb_active))
+			return NULL;
+
+		/* Mark backup_ghcb active before writing to it */
+		data->backup_ghcb_active = true;
+
+		state->ghcb = &data->backup_ghcb;
+
+		/* Backup GHCB content */
+		*state->ghcb = *ghcb;
+	} else {
+		state->ghcb = NULL;
+		data->ghcb_active = true;
+	}
+
+	return ghcb;
+}
+
+static void sev_es_put_ghcb(struct ghcb_state *state)
+{
+	struct sev_es_runtime_data *data;
+	struct ghcb *ghcb;
+
+	data = this_cpu_read(runtime_data);
+	ghcb = &data->ghcb_page;
+
+	if (state->ghcb) {
+		/* Restore GHCB from Backup */
+		*ghcb = *state->ghcb;
+		data->backup_ghcb_active = false;
+		state->ghcb = NULL;
+	} else {
+		data->ghcb_active = false;
+	}
+}
+
 /* Needed in vc_early_vc_forward_exception */
 void do_early_exception(struct pt_regs *regs, int trapnr);
 
@@ -263,6 +327,9 @@  static void __init sev_es_init_ghcb(int cpu)
 		panic("Can not map GHCBs unencrypted");
 
 	memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
+
+	data->ghcb_active = false;
+	data->backup_ghcb_active = false;
 }
 
 static void __init init_vc_stack_names(void)
@@ -367,6 +434,104 @@  static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
 	return result;
 }
 
+static void vc_forward_exception(struct es_em_ctxt *ctxt)
+{
+	long error_code = ctxt->fi.error_code;
+	int trapnr = ctxt->fi.vector;
+
+	ctxt->regs->orig_ax = ctxt->fi.error_code;
+
+	switch (trapnr) {
+	case X86_TRAP_GP:
+		do_general_protection(ctxt->regs, error_code);
+		break;
+	case X86_TRAP_UD:
+		do_invalid_op(ctxt->regs, 0);
+		break;
+	default:
+		pr_emerg("ERROR: Unsupported exception in #VC instruction emulation - can't continue\n");
+		BUG();
+	}
+}
+
+dotraplinkage void do_vmm_communication(struct pt_regs *regs,
+					unsigned long exit_code)
+{
+	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+	struct ghcb_state state;
+	struct es_em_ctxt ctxt;
+	enum es_result result;
+	struct ghcb *ghcb;
+
+	lockdep_assert_irqs_disabled();
+
+	/*
+	 * This is invoked through an interrupt gate, so IRQs are disabled. The
+	 * code below might walk page-tables for user or kernel addresses, so
+	 * keep the IRQs disabled to protect us against concurrent TLB flushes.
+	 */
+
+	ghcb = sev_es_get_ghcb(&state);
+	if (!ghcb) {
+		/*
+		 * Mark GHCBs inactive so that panic() is able to print the
+		 * message.
+		 */
+		data->ghcb_active        = false;
+		data->backup_ghcb_active = false;
+
+		panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+	}
+
+	vc_ghcb_invalidate(ghcb);
+	result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+
+	if (result == ES_OK)
+		result = vc_handle_exitcode(&ctxt, ghcb, exit_code);
+
+	sev_es_put_ghcb(&state);
+
+	/* Done - now check the result */
+	switch (result) {
+	case ES_OK:
+		vc_finish_insn(&ctxt);
+		break;
+	case ES_UNSUPPORTED:
+		pr_emerg("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+			 exit_code, regs->ip);
+		goto fail;
+	case ES_VMM_ERROR:
+		pr_emerg("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+			 exit_code, regs->ip);
+		goto fail;
+	case ES_DECODE_FAILED:
+		pr_emerg("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+			 exit_code, regs->ip);
+		goto fail;
+	case ES_EXCEPTION:
+		vc_forward_exception(&ctxt);
+		break;
+	case ES_RETRY:
+		/* Nothing to do */
+		break;
+	default:
+		pr_emerg("PANIC: Unknown result in %s():%d\n", __func__, result);
+		/*
+		 * Emulating the instruction which caused the #VC exception
+		 * failed - can't continue so print debug information
+		 */
+		BUG();
+	}
+
+	return;
+
+fail:
+	show_regs(regs);
+
+	while (true)
+		halt();
+}
+
 bool __init vc_boot_ghcb(struct pt_regs *regs)
 {
 	unsigned long exit_code = regs->orig_ax;