diff mbox series

[v5,2/6] x86/sev-es: Make sure IRQs are disabled while GHCB is active

Message ID 20210614135327.9921-3-joro@8bytes.org (mailing list archive)
State New, archived
Headers show
Series x86/sev-es: Fixes for SEV-ES Guest Support | expand

Commit Message

Joerg Roedel June 14, 2021, 1:53 p.m. UTC
From: Joerg Roedel <jroedel@suse.de>

The #VC handler only cares about IRQs being disabled while the GHCB is
active, as it must not be interrupted by something which could cause
another #VC while it holds the GHCB (NMI is the exception for which the
backup GHCB exits).

Make sure nothing interrupts the code path while the GHCB is active by
disabling IRQs in sev_es_get_ghcb() and restoring the previous irq state
in sev_es_put_ghcb().

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/x86/kernel/sev.c | 48 ++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 14 deletions(-)

Comments

Borislav Petkov June 14, 2021, 4:25 p.m. UTC | #1
On Mon, Jun 14, 2021 at 03:53:23PM +0200, Joerg Roedel wrote:
> From: Joerg Roedel <jroedel@suse.de>
> 
> The #VC handler only cares about IRQs being disabled while the GHCB is
> active, as it must not be interrupted by something which could cause
> another #VC while it holds the GHCB (NMI is the exception for which the
> backup GHCB exits).
> 
> Make sure nothing interrupts the code path while the GHCB is active by
> disabling IRQs in sev_es_get_ghcb() and restoring the previous irq state
> in sev_es_put_ghcb().
> 
> Signed-off-by: Joerg Roedel <jroedel@suse.de>
> ---
>  arch/x86/kernel/sev.c | 48 ++++++++++++++++++++++++++++++-------------
>  1 file changed, 34 insertions(+), 14 deletions(-)

Here's a diff ontop of yours with a couple of points:

* I've named the low-level, interrupts-enabled workers
__sev_get_ghcb()/__sev_put_ghcb() to mean a couple of things:

** underscored to mean, that callers need to disable local locks. There's
also a lockdep_assert_irqs_disabled() to make sure, both in the get and
put function.

** also only "sev" in the name because this code is not used for SEV-ES
only anymore.

* I've done it this way because you have a well-recognized code pattern
where the caller disables interrupts, calls the low-level helpers and
then enables interrupts again when done. VS passing a flags pointer back
and forth which just looks weird.

And as to being easy to use - users can botch flags too, when passing
around so they can just as well do proper interrupts toggling like a
gazillion other places in the kernel.

Also, you have places like exc_vmm_communication() where you have
to artifically pass in flags - I'm looking at your previous version
- even if you already make sure interrupts are disabled with the
BUG_ON assertion on entry. So in those cases you can simply call the
interrupt-enabled, __-variants.

Btw, while we're on exc_vmm_communication, it has a:

	BUG_ON(!irqs_disabled());

on entry and then later

	lockdep_assert_irqs_disabled();

and that second assertion is not really needed, methinks. So a hunk
below removes it.

Thoughts?

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 7d70cddc38be..b85c4a2be9fa 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -192,11 +192,19 @@ void noinstr __sev_es_ist_exit(void)
 	this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
 }
 
-static __always_inline struct ghcb *__sev_es_get_ghcb(struct ghcb_state *state)
+/*
+ * Nothing shall interrupt this code path while holding the per-CPU
+ * GHCB. The backup GHCB is only for NMIs interrupting this path.
+ *
+ * Callers must disable local interrupts around it.
+ */
+static __always_inline struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
 {
 	struct sev_es_runtime_data *data;
 	struct ghcb *ghcb;
 
+	lockdep_assert_irqs_disabled();
+
 	data = this_cpu_read(runtime_data);
 	ghcb = &data->ghcb_page;
 
@@ -231,18 +239,6 @@ static __always_inline struct ghcb *__sev_es_get_ghcb(struct ghcb_state *state)
 	return ghcb;
 }
 
-static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state,
-						    unsigned long *flags)
-{
-	/*
-	 * Nothing shall interrupt this code path while holding the per-cpu
-	 * GHCB. The backup GHCB is only for NMIs interrupting this path.
-	 */
-	local_irq_save(*flags);
-
-	return __sev_es_get_ghcb(state);
-}
-
 /* Needed in vc_early_forward_exception */
 void do_early_exception(struct pt_regs *regs, int trapnr);
 
@@ -491,11 +487,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
 /* Include code shared with pre-decompression boot stage */
 #include "sev-shared.c"
 
-static __always_inline void __sev_es_put_ghcb(struct ghcb_state *state)
+static __always_inline void __sev_put_ghcb(struct ghcb_state *state)
 {
 	struct sev_es_runtime_data *data;
 	struct ghcb *ghcb;
 
+	lockdep_assert_irqs_disabled();
+
 	data = this_cpu_read(runtime_data);
 	ghcb = &data->ghcb_page;
 
@@ -514,13 +512,6 @@ static __always_inline void __sev_es_put_ghcb(struct ghcb_state *state)
 	}
 }
 
-static __always_inline void sev_es_put_ghcb(struct ghcb_state *state,
-					    unsigned long flags)
-{
-	__sev_es_put_ghcb(state);
-	local_irq_restore(flags);
-}
-
 void noinstr __sev_es_nmi_complete(void)
 {
 	struct ghcb_state state;
@@ -528,7 +519,7 @@ void noinstr __sev_es_nmi_complete(void)
 
 	BUG_ON(!irqs_disabled());
 
-	ghcb = __sev_es_get_ghcb(&state);
+	ghcb = __sev_get_ghcb(&state);
 
 	vc_ghcb_invalidate(ghcb);
 	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
@@ -538,7 +529,7 @@ void noinstr __sev_es_nmi_complete(void)
 	sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
 	VMGEXIT();
 
-	__sev_es_put_ghcb(&state);
+	__sev_put_ghcb(&state);
 }
 
 static u64 get_jump_table_addr(void)
@@ -548,7 +539,9 @@ static u64 get_jump_table_addr(void)
 	struct ghcb *ghcb;
 	u64 ret = 0;
 
-	ghcb = sev_es_get_ghcb(&state, &flags);
+	local_irq_save(flags);
+
+	ghcb = __sev_get_ghcb(&state);
 
 	vc_ghcb_invalidate(ghcb);
 	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
@@ -562,7 +555,9 @@ static u64 get_jump_table_addr(void)
 	    ghcb_sw_exit_info_2_is_valid(ghcb))
 		ret = ghcb->save.sw_exit_info_2;
 
-	sev_es_put_ghcb(&state, flags);
+	__sev_put_ghcb(&state);
+
+	local_irq_restore(flags);
 
 	return ret;
 }
@@ -686,7 +681,9 @@ static void sev_es_ap_hlt_loop(void)
 	unsigned long flags;
 	struct ghcb *ghcb;
 
-	ghcb = sev_es_get_ghcb(&state, &flags);
+	local_irq_save(flags);
+
+	ghcb = __sev_get_ghcb(&state);
 
 	while (true) {
 		vc_ghcb_invalidate(ghcb);
@@ -703,7 +700,9 @@ static void sev_es_ap_hlt_loop(void)
 			break;
 	}
 
-	sev_es_put_ghcb(&state, flags);
+	__sev_put_ghcb(&state);
+
+	local_irq_restore(flags);
 }
 
 /*
@@ -1364,7 +1363,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	}
 
 	irq_state = irqentry_nmi_enter(regs);
-	lockdep_assert_irqs_disabled();
 	instrumentation_begin();
 
 	/*
@@ -1373,7 +1371,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	 * keep the IRQs disabled to protect us against concurrent TLB flushes.
 	 */
 
-	ghcb = __sev_es_get_ghcb(&state);
+	ghcb = __sev_get_ghcb(&state);
 
 	vc_ghcb_invalidate(ghcb);
 	result = vc_init_em_ctxt(&ctxt, regs, error_code);
@@ -1381,7 +1379,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	if (result == ES_OK)
 		result = vc_handle_exitcode(&ctxt, ghcb, error_code);
 
-	__sev_es_put_ghcb(&state);
+	__sev_put_ghcb(&state);
 
 	/* Done - now check the result */
 	switch (result) {
Borislav Petkov June 14, 2021, 4:30 p.m. UTC | #2
On Mon, Jun 14, 2021 at 06:25:18PM +0200, Borislav Petkov wrote:
> ** underscored to mean, that callers need to disable local locks. There's
							     ^^^^^^

"interrupts" ofc.
Joerg Roedel June 15, 2021, 9:37 a.m. UTC | #3
On Mon, Jun 14, 2021 at 06:25:18PM +0200, Borislav Petkov wrote:
> Thoughts?

Okay, I tested a bit with this, it mostly works fine.

The lockdep_hardirqs_disabled() check in __sev_get/put_ghcb() was
problematic, because these functions are called also when no state is
set up yet. More specifically it is called from __sev_es_nmi_complete(),
which is called at the very beginning of do_nmi(), before the NMI
handler entered NMI mode. So this triggered a warning in the NMI
test-suite when booting up, I replaced these checks with a
WARN_ON(!irqs_disabled()) and also removed the BUG_ON()s.

With that it boots fine and without SEV-ES related warnings.

Regards,

	Joerg
diff mbox series

Patch

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 4fd997bbf059..f1bd95d451c3 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -192,7 +192,7 @@  void noinstr __sev_es_ist_exit(void)
 	this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
 }
 
-static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
+static __always_inline struct ghcb *__sev_es_get_ghcb(struct ghcb_state *state)
 {
 	struct sev_es_runtime_data *data;
 	struct ghcb *ghcb;
@@ -231,6 +231,18 @@  static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
 	return ghcb;
 }
 
+static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state,
+						    unsigned long *flags)
+{
+	/*
+	 * Nothing shall interrupt this code path while holding the per-cpu
+	 * GHCB. The backup GHCB is only for NMIs interrupting this path.
+	 */
+	local_irq_save(*flags);
+
+	return __sev_es_get_ghcb(state);
+}
+
 /* Needed in vc_early_forward_exception */
 void do_early_exception(struct pt_regs *regs, int trapnr);
 
@@ -479,7 +491,7 @@  static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
 /* Include code shared with pre-decompression boot stage */
 #include "sev-shared.c"
 
-static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+static __always_inline void __sev_es_put_ghcb(struct ghcb_state *state)
 {
 	struct sev_es_runtime_data *data;
 	struct ghcb *ghcb;
@@ -502,12 +514,21 @@  static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
 	}
 }
 
+static __always_inline void sev_es_put_ghcb(struct ghcb_state *state,
+					    unsigned long flags)
+{
+	__sev_es_put_ghcb(state);
+	local_irq_restore(flags);
+}
+
 void noinstr __sev_es_nmi_complete(void)
 {
 	struct ghcb_state state;
 	struct ghcb *ghcb;
 
-	ghcb = sev_es_get_ghcb(&state);
+	BUG_ON(!irqs_disabled());
+
+	ghcb = __sev_es_get_ghcb(&state);
 
 	vc_ghcb_invalidate(ghcb);
 	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
@@ -517,7 +538,7 @@  void noinstr __sev_es_nmi_complete(void)
 	sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
 	VMGEXIT();
 
-	sev_es_put_ghcb(&state);
+	__sev_es_put_ghcb(&state);
 }
 
 static u64 get_jump_table_addr(void)
@@ -527,9 +548,7 @@  static u64 get_jump_table_addr(void)
 	struct ghcb *ghcb;
 	u64 ret = 0;
 
-	local_irq_save(flags);
-
-	ghcb = sev_es_get_ghcb(&state);
+	ghcb = sev_es_get_ghcb(&state, &flags);
 
 	vc_ghcb_invalidate(ghcb);
 	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
@@ -543,9 +562,7 @@  static u64 get_jump_table_addr(void)
 	    ghcb_sw_exit_info_2_is_valid(ghcb))
 		ret = ghcb->save.sw_exit_info_2;
 
-	sev_es_put_ghcb(&state);
-
-	local_irq_restore(flags);
+	sev_es_put_ghcb(&state, flags);
 
 	return ret;
 }
@@ -666,9 +683,10 @@  static bool __init sev_es_setup_ghcb(void)
 static void sev_es_ap_hlt_loop(void)
 {
 	struct ghcb_state state;
+	unsigned long flags;
 	struct ghcb *ghcb;
 
-	ghcb = sev_es_get_ghcb(&state);
+	ghcb = sev_es_get_ghcb(&state, &flags);
 
 	while (true) {
 		vc_ghcb_invalidate(ghcb);
@@ -685,7 +703,7 @@  static void sev_es_ap_hlt_loop(void)
 			break;
 	}
 
-	sev_es_put_ghcb(&state);
+	sev_es_put_ghcb(&state, flags);
 }
 
 /*
@@ -1335,6 +1353,8 @@  DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	enum es_result result;
 	struct ghcb *ghcb;
 
+	BUG_ON(!irqs_disabled());
+
 	/*
 	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
 	 */
@@ -1353,7 +1373,7 @@  DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	 * keep the IRQs disabled to protect us against concurrent TLB flushes.
 	 */
 
-	ghcb = sev_es_get_ghcb(&state);
+	ghcb = __sev_es_get_ghcb(&state);
 
 	vc_ghcb_invalidate(ghcb);
 	result = vc_init_em_ctxt(&ctxt, regs, error_code);
@@ -1361,7 +1381,7 @@  DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	if (result == ES_OK)
 		result = vc_handle_exitcode(&ctxt, ghcb, error_code);
 
-	sev_es_put_ghcb(&state);
+	__sev_es_put_ghcb(&state);
 
 	/* Done - now check the result */
 	switch (result) {