diff mbox series

[v3,20/23] kvm: arm64: Intercept host's CPU_SUSPEND PSCI SMCs

Message ID 20201126155421.14901-21-dbrazdil@google.com (mailing list archive)
State New, archived
Headers show
Series Opt-in always-on nVHE hypervisor | expand

Commit Message

David Brazdil Nov. 26, 2020, 3:54 p.m. UTC
Add a handler of CPU_SUSPEND host PSCI SMCs. The SMC can either enter
a sleep state indistinguishable from a WFI or a deeper sleep state that
behaves like a CPU_OFF+CPU_ON except that the core is still considered
online when asleep.

The handler saves r0,pc of the host and makes the same call to EL3 with
the hyp CPU entry point. It either returns back to the handler and then
back to the host, or wakes up into the entry point and initializes EL2
state before dropping back to EL1.

A core can only suspend itself but other cores can concurrently invoke
CPU_ON with this core as target. To avoid racing them for the same
boot args struct, CPU_SUSPEND uses a different struct instance and entry
point. Each entry point selects the corresponding struct to restore host
boot args from. This avoids the need for locking in CPU_SUSPEND.

Signed-off-by: David Brazdil <dbrazdil@google.com>
---
 arch/arm64/kvm/hyp/nvhe/hyp-init.S   |  9 +++++
 arch/arm64/kvm/hyp/nvhe/psci-relay.c | 50 +++++++++++++++++++++++++---
 2 files changed, 54 insertions(+), 5 deletions(-)

Comments

Mark Rutland Dec. 1, 2020, 2:57 p.m. UTC | #1
On Thu, Nov 26, 2020 at 03:54:18PM +0000, David Brazdil wrote:
> Add a handler of CPU_SUSPEND host PSCI SMCs. The SMC can either enter
> a sleep state indistinguishable from a WFI or a deeper sleep state that
> behaves like a CPU_OFF+CPU_ON except that the core is still considered
> online when asleep.
> 
> The handler saves r0,pc of the host and makes the same call to EL3 with
> the hyp CPU entry point. It either returns back to the handler and then
> back to the host, or wakes up into the entry point and initializes EL2
> state before dropping back to EL1.

For those CPU_SUSPEND calls which lose context, is there no EL2 state
that you need to save/restore, or is that all saved elsewhere already?

The usual suspects are PMU, debug, and timers, so maybe not. It'd be
nice to have a statement in the commit message if we're certain there's
no state that we need to save.

> A core can only suspend itself but other cores can concurrently invoke
> CPU_ON with this core as target. To avoid racing them for the same
> boot args struct, CPU_SUSPEND uses a different struct instance and entry
> point. Each entry point selects the corresponding struct to restore host
> boot args from. This avoids the need for locking in CPU_SUSPEND.

I found this a bit confusing since the first sentence can be read to
mean that CPU_ON is expected to compose with CPU_SUSPEND, whereas what
this is actually saying is the implementation ensures they don't
interact. How about:

| CPU_ON and CPU_SUSPEND are both implemented using struct cpu_boot_args
| to store the state upon powerup, with each CPU having separate structs
| for CPU_ON and CPU_SUSPEND so that CPU_SUSPEND can operate locklessly
| and so that a CPU_ON xall targetting a CPU cannot interfere with a
| concurrent CPU_SUSPEND call on that CPU.

The patch itself looks fine to me.

Thanks,
Mark.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index ea71f653af55..fbb195851fb9 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -162,7 +162,15 @@  alternative_else_nop_endif
 	ret
 SYM_CODE_END(___kvm_hyp_init)
 
+SYM_CODE_START(__kvm_hyp_cpu_suspend_entry)
+	mov	x28, #0				// is_cpu_on = false
+	b	__kvm_hyp_cpu_common_entry
+SYM_CODE_END(__kvm_hyp_cpu_suspend_entry)
+
 SYM_CODE_START(__kvm_hyp_cpu_on_entry)
+	mov	x28, #1				// is_cpu_on = true
+
+SYM_INNER_LABEL(__kvm_hyp_cpu_common_entry, SYM_L_LOCAL)
 	msr	SPsel, #1			// We want to use SP_EL{1,2}
 
 	/* Check that the core was booted in EL2. */
@@ -188,6 +196,7 @@  SYM_CODE_START(__kvm_hyp_cpu_on_entry)
 	kimg_hyp_va x1, x2
 
 	/* Leave idmap. */
+	mov	x0, x28
 	br	x1
 SYM_CODE_END(__kvm_hyp_cpu_on_entry)
 
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 39e507672e6e..592c11e9851c 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -17,6 +17,7 @@ 
 #include <nvhe/trap_handler.h>
 
 extern char __kvm_hyp_cpu_on_entry[];
+extern char __kvm_hyp_cpu_suspend_entry[];
 
 void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
 
@@ -39,6 +40,7 @@  struct cpu_boot_args {
 
 static DEFINE_PER_CPU(atomic_t, cpu_on_lock) = ATOMIC_INIT(0);
 static DEFINE_PER_CPU(struct cpu_boot_args, cpu_on_args);
+static DEFINE_PER_CPU(struct cpu_boot_args, cpu_suspend_args);
 
 static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
 {
@@ -112,6 +114,34 @@  static unsigned int find_cpu_id(u64 mpidr)
 	return INVALID_CPU_ID;
 }
 
+static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(u64, power_state, host_ctxt, 1);
+	DECLARE_REG(unsigned long, pc, host_ctxt, 2);
+	DECLARE_REG(unsigned long, r0, host_ctxt, 3);
+
+	struct cpu_boot_args *boot_args;
+	struct kvm_nvhe_init_params *init_params;
+
+	boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_suspend_args));
+	init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
+
+	/*
+	 * No need to acquire a lock before writing to boot_args because a core
+	 * can only suspend itself and the racy CPU_ON uses a separate struct.
+	 */
+
+	*boot_args = (struct cpu_boot_args){ .pc = pc, .r0 = r0 };
+
+	/*
+	 * Will either return if shallow sleep state, or wake up into the entry
+	 * point if it is a deep sleep state.
+	 */
+	return psci_call(func_id, power_state,
+			 __hyp_pa(hyp_symbol_addr(__kvm_hyp_cpu_suspend_entry)),
+			 __hyp_pa(init_params));
+}
+
 static __always_inline bool try_acquire_cpu_on_lock(atomic_t *l) {
 	return atomic_cmpxchg_acquire(l, CPU_UNLOCKED, CPU_LOCKED) == CPU_UNLOCKED;
 }
@@ -165,27 +195,35 @@  static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
 	return ret;
 }
 
-asmlinkage void __noreturn __kvm_hyp_psci_cpu_entry(void)
+asmlinkage void __noreturn __kvm_hyp_psci_cpu_entry(bool is_cpu_on)
 {
 	atomic_t *lock;
 	struct cpu_boot_args *boot_args;
 	struct kvm_cpu_context *host_ctxt;
 
-	lock = this_cpu_ptr(hyp_symbol_addr(cpu_on_lock));
-	boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args));
+	if (is_cpu_on)
+		boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args));
+	else
+		boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_suspend_args));
+
 	host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt;
 
 	cpu_reg(host_ctxt, 0) = boot_args->r0;
 	write_sysreg_el2(boot_args->pc, SYS_ELR);
 
-	release_cpu_on_lock(lock);
+	if (is_cpu_on) {
+		lock = this_cpu_ptr(hyp_symbol_addr(cpu_on_lock));
+		release_cpu_on_lock(lock);
+	}
 
 	__host_enter(host_ctxt);
 }
 
 static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
 {
-	if (func_id == kvm_host_psci_function_id[PSCI_FN_CPU_OFF])
+	if (func_id == kvm_host_psci_function_id[PSCI_FN_CPU_SUSPEND])
+		return psci_cpu_suspend(func_id, host_ctxt);
+	else if (func_id == kvm_host_psci_function_id[PSCI_FN_CPU_OFF])
 		return psci_forward(host_ctxt);
 	else if (func_id == kvm_host_psci_function_id[PSCI_FN_CPU_ON])
 		return psci_cpu_on(func_id, host_ctxt);
@@ -209,6 +247,8 @@  static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_
 	case PSCI_0_2_FN_SYSTEM_RESET:
 		psci_forward_noreturn(host_ctxt);
 		unreachable();
+	case PSCI_0_2_FN64_CPU_SUSPEND:
+		return psci_cpu_suspend(func_id, host_ctxt);
 	case PSCI_0_2_FN64_CPU_ON:
 		return psci_cpu_on(func_id, host_ctxt);
 	default: