diff mbox

[v12,04/16] arm64: kvm: allows kvm cpu hotplug

Message ID 23ca498d5e28017549c6076812d60b18e86fa20e.1448403503.git.geoff@infradead.org (mailing list archive)
State New, archived
Headers show

Commit Message

Geoff Levand Nov. 24, 2015, 10:25 p.m. UTC
From: AKASHI Takahiro <takahiro.akashi@linaro.org>

The current kvm implementation on arm64 does cpu-specific initialization
at system boot, and has no way to gracefully shutdown a core in terms of
kvm. This prevents, especially, kexec from rebooting the system on a boot
core in EL2.

This patch adds a cpu tear-down function and also puts an existing cpu-init
code into a separate function, kvm_arch_hardware_disable() and
kvm_arch_hardware_enable() respectively.
We don't need arm64-specific cpu hotplug hook any more.

Since this patch modifies common part of code between arm and arm64, one
stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
compiling errors.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   | 10 ++++-
 arch/arm/include/asm/kvm_mmu.h    |  1 +
 arch/arm/kvm/arm.c                | 79 ++++++++++++++++++---------------------
 arch/arm/kvm/mmu.c                |  5 +++
 arch/arm64/include/asm/kvm_host.h | 16 +++++++-
 arch/arm64/include/asm/kvm_mmu.h  |  1 +
 arch/arm64/include/asm/virt.h     |  9 +++++
 arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++++
 arch/arm64/kvm/hyp.S              | 32 ++++++++++++++--
 9 files changed, 138 insertions(+), 48 deletions(-)

Comments

Marc Zyngier Nov. 27, 2015, 1:54 p.m. UTC | #1
On 24/11/15 22:25, Geoff Levand wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
> 
> The current kvm implementation on arm64 does cpu-specific initialization
> at system boot, and has no way to gracefully shutdown a core in terms of
> kvm. This prevents, especially, kexec from rebooting the system on a boot
> core in EL2.
> 
> This patch adds a cpu tear-down function and also puts an existing cpu-init
> code into a separate function, kvm_arch_hardware_disable() and
> kvm_arch_hardware_enable() respectively.
> We don't need arm64-specific cpu hotplug hook any more.
> 
> Since this patch modifies common part of code between arm and arm64, one
> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
> compiling errors.
> 
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> ---
>  arch/arm/include/asm/kvm_host.h   | 10 ++++-
>  arch/arm/include/asm/kvm_mmu.h    |  1 +
>  arch/arm/kvm/arm.c                | 79 ++++++++++++++++++---------------------
>  arch/arm/kvm/mmu.c                |  5 +++
>  arch/arm64/include/asm/kvm_host.h | 16 +++++++-
>  arch/arm64/include/asm/kvm_mmu.h  |  1 +
>  arch/arm64/include/asm/virt.h     |  9 +++++
>  arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++++
>  arch/arm64/kvm/hyp.S              | 32 ++++++++++++++--
>  9 files changed, 138 insertions(+), 48 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 6692982..9242765 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -214,6 +214,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
>  	kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
>  }
>  
> +static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
> +					phys_addr_t phys_idmap_start)
> +{
> +	/*
> +	 * TODO
> +	 * kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
> +	 */
> +}
> +
>  static inline int kvm_arch_dev_ioctl_check_extension(long ext)
>  {
>  	return 0;
> @@ -226,7 +235,6 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>  
>  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
>  
> -static inline void kvm_arch_hardware_disable(void) {}
>  static inline void kvm_arch_hardware_unsetup(void) {}
>  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index 405aa18..dc6fadf 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -66,6 +66,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
>  phys_addr_t kvm_mmu_get_httbr(void);
>  phys_addr_t kvm_mmu_get_boot_httbr(void);
>  phys_addr_t kvm_get_idmap_vector(void);
> +phys_addr_t kvm_get_idmap_start(void);
>  int kvm_mmu_init(void);
>  void kvm_clear_hyp_idmap(void);
>  
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index eab83b2..a5d9d74 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -16,7 +16,6 @@
>   * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
>   */
>  
> -#include <linux/cpu.h>
>  #include <linux/cpu_pm.h>
>  #include <linux/errno.h>
>  #include <linux/err.h>
> @@ -61,6 +60,8 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
>  static u8 kvm_next_vmid;
>  static DEFINE_SPINLOCK(kvm_vmid_lock);
>  
> +static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
> +
>  static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
>  {
>  	BUG_ON(preemptible());
> @@ -85,11 +86,6 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
>  	return &kvm_arm_running_vcpu;
>  }
>  
> -int kvm_arch_hardware_enable(void)
> -{
> -	return 0;
> -}
> -
>  int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
>  {
>  	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
> @@ -959,7 +955,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  	}
>  }
>  
> -static void cpu_init_hyp_mode(void *dummy)
> +int kvm_arch_hardware_enable(void)
>  {
>  	phys_addr_t boot_pgd_ptr;
>  	phys_addr_t pgd_ptr;
> @@ -967,6 +963,9 @@ static void cpu_init_hyp_mode(void *dummy)
>  	unsigned long stack_page;
>  	unsigned long vector_ptr;
>  
> +	if (__hyp_get_vectors() != hyp_default_vectors)
> +		return 0;
> +
>  	/* Switch from the HYP stub to our own HYP init vector */
>  	__hyp_set_vectors(kvm_get_idmap_vector());
>  
> @@ -979,38 +978,50 @@ static void cpu_init_hyp_mode(void *dummy)
>  	__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
>  
>  	kvm_arm_init_debug();
> +
> +	return 0;
>  }
>  
> -static int hyp_init_cpu_notify(struct notifier_block *self,
> -			       unsigned long action, void *cpu)
> +void kvm_arch_hardware_disable(void)
>  {
> -	switch (action) {
> -	case CPU_STARTING:
> -	case CPU_STARTING_FROZEN:
> -		if (__hyp_get_vectors() == hyp_default_vectors)
> -			cpu_init_hyp_mode(NULL);
> -		break;
> -	}
> +	phys_addr_t boot_pgd_ptr;
> +	phys_addr_t phys_idmap_start;
>  
> -	return NOTIFY_OK;
> -}
> +	if (__hyp_get_vectors() == hyp_default_vectors)
> +		return;
>  
> -static struct notifier_block hyp_init_cpu_nb = {
> -	.notifier_call = hyp_init_cpu_notify,
> -};
> +	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
> +	phys_idmap_start = kvm_get_idmap_start();
> +
> +	__cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start);
> +}
>  
>  #ifdef CONFIG_CPU_PM
>  static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
>  				    unsigned long cmd,
>  				    void *v)
>  {
> -	if (cmd == CPU_PM_EXIT &&
> -	    __hyp_get_vectors() == hyp_default_vectors) {
> -		cpu_init_hyp_mode(NULL);
> +	switch (cmd) {
> +	case CPU_PM_ENTER:
> +		if (__hyp_get_vectors() != hyp_default_vectors)
> +			__this_cpu_write(kvm_arm_hardware_enabled, 1);
> +		else
> +			__this_cpu_write(kvm_arm_hardware_enabled, 0);
> +		/*
> +		 * don't call kvm_arch_hardware_disable() in case of
> +		 * CPU_PM_ENTER because it does't actually save any state.
> +		 */
> +
> +		return NOTIFY_OK;
> +	case CPU_PM_EXIT:
> +		if (__this_cpu_read(kvm_arm_hardware_enabled))
> +			kvm_arch_hardware_enable();
> +
>  		return NOTIFY_OK;
> -	}
>  
> -	return NOTIFY_DONE;
> +	default:
> +		return NOTIFY_DONE;
> +	}
>  }
>  
>  static struct notifier_block hyp_init_cpu_pm_nb = {
> @@ -1108,11 +1119,6 @@ static int init_hyp_mode(void)
>  	}
>  
>  	/*
> -	 * Execute the init code on each CPU.
> -	 */
> -	on_each_cpu(cpu_init_hyp_mode, NULL, 1);
> -
> -	/*
>  	 * Init HYP view of VGIC
>  	 */
>  	err = kvm_vgic_hyp_init();
> @@ -1186,26 +1192,15 @@ int kvm_arch_init(void *opaque)
>  		}
>  	}
>  
> -	cpu_notifier_register_begin();
> -
>  	err = init_hyp_mode();
>  	if (err)
>  		goto out_err;
>  
> -	err = __register_cpu_notifier(&hyp_init_cpu_nb);
> -	if (err) {
> -		kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
> -		goto out_err;
> -	}
> -
> -	cpu_notifier_register_done();
> -
>  	hyp_cpu_pm_init();
>  
>  	kvm_coproc_table_init();
>  	return 0;
>  out_err:
> -	cpu_notifier_register_done();
>  	return err;
>  }
>  
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index 6984342..69b4a33 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -1644,6 +1644,11 @@ phys_addr_t kvm_get_idmap_vector(void)
>  	return hyp_idmap_vector;
>  }
>  
> +phys_addr_t kvm_get_idmap_start(void)
> +{
> +	return hyp_idmap_start;
> +}
> +
>  int kvm_mmu_init(void)
>  {
>  	int err;
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index a35ce72..0b540f8 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -223,6 +223,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
>  struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
>  
>  u64 kvm_call_hyp(void *hypfn, ...);
> +void kvm_call_reset(phys_addr_t boot_pgd_ptr, phys_addr_t phys_idmap_start);
>  void force_vm_exit(const cpumask_t *mask);
>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>  
> @@ -247,7 +248,20 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
>  		     hyp_stack_ptr, vector_ptr);
>  }
>  
> -static inline void kvm_arch_hardware_disable(void) {}
> +static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
> +					phys_addr_t phys_idmap_start)
> +{
> +	/*
> +	 * Call reset code, and switch back to stub hyp vectors.
> +	 */
> +	kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
> +}
> +
> +struct vgic_sr_vectors {
> +	void	*save_vgic;
> +	void	*restore_vgic;
> +};
> +

Why is this structure being re-introduced? It has been removed in 48f8bd5.

>  static inline void kvm_arch_hardware_unsetup(void) {}
>  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index 6150567..ff5a087 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -98,6 +98,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
>  phys_addr_t kvm_mmu_get_httbr(void);
>  phys_addr_t kvm_mmu_get_boot_httbr(void);
>  phys_addr_t kvm_get_idmap_vector(void);
> +phys_addr_t kvm_get_idmap_start(void);
>  int kvm_mmu_init(void);
>  void kvm_clear_hyp_idmap(void);
>  
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 3070096..bca79f9 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -58,9 +58,18 @@
>  
>  #define HVC_CALL_FUNC 3
>  
> +/*
> + * HVC_RESET_CPU - Reset cpu in EL2 to initial state.
> + *
> + * @x0: entry address in trampoline code in va
> + * @x1: identical mapping page table in pa
> + */
> +
>  #define BOOT_CPU_MODE_EL1	(0xe11)
>  #define BOOT_CPU_MODE_EL2	(0xe12)
>  
> +#define HVC_RESET_CPU 4
> +

Why isn't this next to the comment that document it?

>  #ifndef __ASSEMBLY__
>  
>  /*
> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
> index 2e67a48..1925163 100644
> --- a/arch/arm64/kvm/hyp-init.S
> +++ b/arch/arm64/kvm/hyp-init.S
> @@ -139,6 +139,39 @@ merged:
>  	eret
>  ENDPROC(__kvm_hyp_init)
>  
> +	/*
> +	 * x0: HYP boot pgd
> +	 * x1: HYP phys_idmap_start
> +	 */
> +ENTRY(__kvm_hyp_reset)
> +	/* We're in trampoline code in VA, switch back to boot page tables */
> +	msr	ttbr0_el2, x0
> +	isb
> +
> +	/* Invalidate the old TLBs */
> +	tlbi	alle2
> +	dsb	sy

I find the location of that TLBI a bit weird. If you want to do
something more useful, move it to after the MMU has been disabled.

> +
> +	/* Branch into PA space */
> +	adr	x0, 1f
> +	bfi	x1, x0, #0, #PAGE_SHIFT
> +	br	x1
> +
> +	/* We're now in idmap, disable MMU */
> +1:	mrs	x0, sctlr_el2
> +	ldr	x1, =SCTLR_EL2_FLAGS
> +	bic	x0, x0, x1		// Clear SCTL_M and etc
> +	msr	sctlr_el2, x0
> +	isb
> +
> +	/* Install stub vectors */
> +	adrp	x0, __hyp_stub_vectors
> +	add	x0, x0, #:lo12:__hyp_stub_vectors
> +	msr	vbar_el2, x0
> +
> +	eret
> +ENDPROC(__kvm_hyp_reset)
> +
>  	.ltorg
>  
>  	.popsection
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index 1bef8db..aca11d6 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -939,6 +939,11 @@ ENTRY(kvm_call_hyp)
>  	ret
>  ENDPROC(kvm_call_hyp)
>  
> +ENTRY(kvm_call_reset)
> +	hvc	#HVC_RESET_CPU
> +	ret
> +ENDPROC(kvm_call_reset)
> +
>  .macro invalid_vector	label, target
>  	.align	2
>  \label:
> @@ -982,10 +987,27 @@ el1_sync:					// Guest trapped into EL2
>  	cmp	x18, #HVC_GET_VECTORS
>  	b.ne	1f
>  	mrs	x0, vbar_el2
> -	b	2f
> -
> -1:	/* Default to HVC_CALL_HYP. */
> +	b	do_eret
>  
> +	/* jump into trampoline code */
> +1:	cmp	x18, #HVC_RESET_CPU
> +	b.ne	2f
> +	/*
> +	 * Entry point is:
> +	 *	TRAMPOLINE_VA
> +	 *	+ (__kvm_hyp_reset - (__hyp_idmap_text_start & PAGE_MASK))
> +	 */
> +	adrp	x2, __kvm_hyp_reset
> +	add	x2, x2, #:lo12:__kvm_hyp_reset
> +	adrp	x3, __hyp_idmap_text_start
> +	add	x3, x3, #:lo12:__hyp_idmap_text_start
> +	and	x3, x3, PAGE_MASK
> +	sub	x2, x2, x3
> +	ldr	x3, =TRAMPOLINE_VA

Nit: You should be able to do a "mov x3, #TRAMPOLINE_VA and avoid the
load from memory.

> +	add	x2, x2, x3
> +	br	x2				// no return
> +
> +2:	/* Default to HVC_CALL_HYP. */
>  	push	lr, xzr
>  
>  	/*
> @@ -999,7 +1021,9 @@ el1_sync:					// Guest trapped into EL2
>  	blr	lr
>  
>  	pop	lr, xzr
> -2:	eret
> +
> +do_eret:
> +	eret
>  
>  el1_trap:
>  	/*
> 

Thanks,

	M.
Ashwin Chaugule Dec. 2, 2015, 10:40 p.m. UTC | #2
Hello,

On 24 November 2015 at 17:25, Geoff Levand <geoff@infradead.org> wrote:
> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>
> The current kvm implementation on arm64 does cpu-specific initialization
> at system boot, and has no way to gracefully shutdown a core in terms of
> kvm. This prevents, especially, kexec from rebooting the system on a boot
> core in EL2.
>
> This patch adds a cpu tear-down function and also puts an existing cpu-init
> code into a separate function, kvm_arch_hardware_disable() and
> kvm_arch_hardware_enable() respectively.
> We don't need arm64-specific cpu hotplug hook any more.
>
> Since this patch modifies common part of code between arm and arm64, one
> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
> compiling errors.
>
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> ---
>  arch/arm/include/asm/kvm_host.h   | 10 ++++-
>  arch/arm/include/asm/kvm_mmu.h    |  1 +
>  arch/arm/kvm/arm.c                | 79 ++++++++++++++++++---------------------
>  arch/arm/kvm/mmu.c                |  5 +++
>  arch/arm64/include/asm/kvm_host.h | 16 +++++++-
>  arch/arm64/include/asm/kvm_mmu.h  |  1 +
>  arch/arm64/include/asm/virt.h     |  9 +++++
>  arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++++
>  arch/arm64/kvm/hyp.S              | 32 ++++++++++++++--
>  9 files changed, 138 insertions(+), 48 deletions(-)

[..]

>
>
>  static struct notifier_block hyp_init_cpu_pm_nb = {
> @@ -1108,11 +1119,6 @@ static int init_hyp_mode(void)
>         }
>
>         /*
> -        * Execute the init code on each CPU.
> -        */
> -       on_each_cpu(cpu_init_hyp_mode, NULL, 1);
> -
> -       /*
>          * Init HYP view of VGIC
>          */
>         err = kvm_vgic_hyp_init();

With this flow, the cpu_init_hyp_mode() is called only at VM guest
creation, but vgic_hyp_init() is called at bootup. On a system with
GICv3, it looks like we end up with bogus values from the ICH_VTR_EL2
(to get the number of LRs), because we're not reading it from EL2
anymore.

Whats the best way to fix this?
- Call kvm_arch_hardware_enable() before vgic_hyp_init() and disable later?
- Fold the VGIC init stuff back into hardware_enable()?
- Read the VGIC number of LRs from the hyp stub?
- ..

Regards,
Ashwin.
Ashwin Chaugule Dec. 3, 2015, 1:55 p.m. UTC | #3
On 2 December 2015 at 17:40, Ashwin Chaugule <ashwin.chaugule@linaro.org> wrote:
> Hello,
>
> On 24 November 2015 at 17:25, Geoff Levand <geoff@infradead.org> wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>
>> The current kvm implementation on arm64 does cpu-specific initialization
>> at system boot, and has no way to gracefully shutdown a core in terms of
>> kvm. This prevents, especially, kexec from rebooting the system on a boot
>> core in EL2.
>>
>> This patch adds a cpu tear-down function and also puts an existing cpu-init
>> code into a separate function, kvm_arch_hardware_disable() and
>> kvm_arch_hardware_enable() respectively.
>> We don't need arm64-specific cpu hotplug hook any more.
>>
>> Since this patch modifies common part of code between arm and arm64, one
>> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
>> compiling errors.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> ---
>>  arch/arm/include/asm/kvm_host.h   | 10 ++++-
>>  arch/arm/include/asm/kvm_mmu.h    |  1 +
>>  arch/arm/kvm/arm.c                | 79 ++++++++++++++++++---------------------
>>  arch/arm/kvm/mmu.c                |  5 +++
>>  arch/arm64/include/asm/kvm_host.h | 16 +++++++-
>>  arch/arm64/include/asm/kvm_mmu.h  |  1 +
>>  arch/arm64/include/asm/virt.h     |  9 +++++
>>  arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++++
>>  arch/arm64/kvm/hyp.S              | 32 ++++++++++++++--
>>  9 files changed, 138 insertions(+), 48 deletions(-)
>
> [..]
>
>>
>>
>>  static struct notifier_block hyp_init_cpu_pm_nb = {
>> @@ -1108,11 +1119,6 @@ static int init_hyp_mode(void)
>>         }
>>
>>         /*
>> -        * Execute the init code on each CPU.
>> -        */
>> -       on_each_cpu(cpu_init_hyp_mode, NULL, 1);
>> -
>> -       /*
>>          * Init HYP view of VGIC
>>          */
>>         err = kvm_vgic_hyp_init();
>
> With this flow, the cpu_init_hyp_mode() is called only at VM guest
> creation, but vgic_hyp_init() is called at bootup. On a system with
> GICv3, it looks like we end up with bogus values from the ICH_VTR_EL2
> (to get the number of LRs), because we're not reading it from EL2
> anymore.

..or more likely, that the function to read the ICH_VTR_EL2 is not
mapped in EL2, because we deferred the cpu_init_hyp_mode() calls. In
any case, this ends up breaking KVM and manifests as a guest console
which is stuck.

>
> Whats the best way to fix this?
> - Call kvm_arch_hardware_enable() before vgic_hyp_init() and disable later?
> - Fold the VGIC init stuff back into hardware_enable()?
> - Read the VGIC number of LRs from the hyp stub?
> - ..
>
> Regards,
> Ashwin.
Marc Zyngier Dec. 3, 2015, 1:58 p.m. UTC | #4
On 02/12/15 22:40, Ashwin Chaugule wrote:
> Hello,
> 
> On 24 November 2015 at 17:25, Geoff Levand <geoff@infradead.org> wrote:
>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>
>> The current kvm implementation on arm64 does cpu-specific initialization
>> at system boot, and has no way to gracefully shutdown a core in terms of
>> kvm. This prevents, especially, kexec from rebooting the system on a boot
>> core in EL2.
>>
>> This patch adds a cpu tear-down function and also puts an existing cpu-init
>> code into a separate function, kvm_arch_hardware_disable() and
>> kvm_arch_hardware_enable() respectively.
>> We don't need arm64-specific cpu hotplug hook any more.
>>
>> Since this patch modifies common part of code between arm and arm64, one
>> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
>> compiling errors.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> ---
>>  arch/arm/include/asm/kvm_host.h   | 10 ++++-
>>  arch/arm/include/asm/kvm_mmu.h    |  1 +
>>  arch/arm/kvm/arm.c                | 79 ++++++++++++++++++---------------------
>>  arch/arm/kvm/mmu.c                |  5 +++
>>  arch/arm64/include/asm/kvm_host.h | 16 +++++++-
>>  arch/arm64/include/asm/kvm_mmu.h  |  1 +
>>  arch/arm64/include/asm/virt.h     |  9 +++++
>>  arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++++
>>  arch/arm64/kvm/hyp.S              | 32 ++++++++++++++--
>>  9 files changed, 138 insertions(+), 48 deletions(-)
> 
> [..]
> 
>>
>>
>>  static struct notifier_block hyp_init_cpu_pm_nb = {
>> @@ -1108,11 +1119,6 @@ static int init_hyp_mode(void)
>>         }
>>
>>         /*
>> -        * Execute the init code on each CPU.
>> -        */
>> -       on_each_cpu(cpu_init_hyp_mode, NULL, 1);
>> -
>> -       /*
>>          * Init HYP view of VGIC
>>          */
>>         err = kvm_vgic_hyp_init();
> 
> With this flow, the cpu_init_hyp_mode() is called only at VM guest
> creation, but vgic_hyp_init() is called at bootup. On a system with
> GICv3, it looks like we end up with bogus values from the ICH_VTR_EL2
> (to get the number of LRs), because we're not reading it from EL2
> anymore.

Indeed, this is completely broken (I just reproduced the issue on a
model). I wish this kind of details had been checked earlier, but thanks
for pointing it out.

> Whats the best way to fix this?
> - Call kvm_arch_hardware_enable() before vgic_hyp_init() and disable later?
> - Fold the VGIC init stuff back into hardware_enable()?

None of that works - kvm_arch_hardware_enable() is called once per CPU,
while vgic_hyp_init() can only be called once. Also,
kvm_arch_hardware_enable() is called from interrupt context, and I
wouldn't feel comfortable starting probing DT and allocating stuff from
there.

> - Read the VGIC number of LRs from the hyp stub?

That's may UNDEF if called in the wrong context. Also, this defeats the
point of stubs, which is just to install the vectors for the hypervisor.

> - ..

Yeah, quite.

Geoff, Takahiro?

	M.
Geoff Levand Dec. 10, 2015, 6:31 p.m. UTC | #5
Hi All,

On Thu, 2015-12-03 at 13:58 +0000, Marc Zyngier wrote:
> Indeed, this is completely broken (I just reproduced the issue on a
> model).

There are users out there that want kexec/kdump support.  I think we
should remove this patch from the series, replace it with a temporary
Kconfig conditional that allows only one of KVM or kexec to be enabled,
and merge kexec and kdump support in for 4.5.  This will satisfy users
who need kexec but not KVM, like kexec based bootloaders.  Once this
KVM hot plug patch is fixed we then merge it, either for 4.5 or 4.6,
depending on the timing.

I'll prepare and post a v13 series that does the above.

-Geoff
Yang Shi Dec. 10, 2015, 6:44 p.m. UTC | #6
On 12/3/2015 5:58 AM, Marc Zyngier wrote:
> On 02/12/15 22:40, Ashwin Chaugule wrote:
>> Hello,
>>
>> On 24 November 2015 at 17:25, Geoff Levand <geoff@infradead.org> wrote:
>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>
>>> The current kvm implementation on arm64 does cpu-specific initialization
>>> at system boot, and has no way to gracefully shutdown a core in terms of
>>> kvm. This prevents, especially, kexec from rebooting the system on a boot
>>> core in EL2.
>>>
>>> This patch adds a cpu tear-down function and also puts an existing cpu-init
>>> code into a separate function, kvm_arch_hardware_disable() and
>>> kvm_arch_hardware_enable() respectively.
>>> We don't need arm64-specific cpu hotplug hook any more.
>>>
>>> Since this patch modifies common part of code between arm and arm64, one
>>> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
>>> compiling errors.
>>>
>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>> ---
>>>   arch/arm/include/asm/kvm_host.h   | 10 ++++-
>>>   arch/arm/include/asm/kvm_mmu.h    |  1 +
>>>   arch/arm/kvm/arm.c                | 79 ++++++++++++++++++---------------------
>>>   arch/arm/kvm/mmu.c                |  5 +++
>>>   arch/arm64/include/asm/kvm_host.h | 16 +++++++-
>>>   arch/arm64/include/asm/kvm_mmu.h  |  1 +
>>>   arch/arm64/include/asm/virt.h     |  9 +++++
>>>   arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++++
>>>   arch/arm64/kvm/hyp.S              | 32 ++++++++++++++--
>>>   9 files changed, 138 insertions(+), 48 deletions(-)
>>
>> [..]
>>
>>>
>>>
>>>   static struct notifier_block hyp_init_cpu_pm_nb = {
>>> @@ -1108,11 +1119,6 @@ static int init_hyp_mode(void)
>>>          }
>>>
>>>          /*
>>> -        * Execute the init code on each CPU.
>>> -        */
>>> -       on_each_cpu(cpu_init_hyp_mode, NULL, 1);
>>> -
>>> -       /*
>>>           * Init HYP view of VGIC
>>>           */
>>>          err = kvm_vgic_hyp_init();
>>
>> With this flow, the cpu_init_hyp_mode() is called only at VM guest
>> creation, but vgic_hyp_init() is called at bootup. On a system with
>> GICv3, it looks like we end up with bogus values from the ICH_VTR_EL2
>> (to get the number of LRs), because we're not reading it from EL2
>> anymore.
>
> Indeed, this is completely broken (I just reproduced the issue on a
> model). I wish this kind of details had been checked earlier, but thanks
> for pointing it out.

Sorry for chiming in late. I did run into something similar when I 
tested some earlier version patches with Akashi San.

The guest bootup just hangs with 4.1 kernel on my LS2085 board which has 
GICv3. Not sure if this is the same issue. But, my bisect did point to 
this patch.

I used to think it is my kernel's problem (4.1 sounds a little bit old) 
since Akashi San can't reproduce this on his Hikey board.

Thanks,
Yang

>
>> Whats the best way to fix this?
>> - Call kvm_arch_hardware_enable() before vgic_hyp_init() and disable later?
>> - Fold the VGIC init stuff back into hardware_enable()?
>
> None of that works - kvm_arch_hardware_enable() is called once per CPU,
> while vgic_hyp_init() can only be called once. Also,
> kvm_arch_hardware_enable() is called from interrupt context, and I
> wouldn't feel comfortable starting probing DT and allocating stuff from
> there.
>
>> - Read the VGIC number of LRs from the hyp stub?
>
> That's may UNDEF if called in the wrong context. Also, this defeats the
> point of stubs, which is just to install the vectors for the hypervisor.
>
>> - ..
>
> Yeah, quite.
>
> Geoff, Takahiro?
>
> 	M.
>
AKASHI Takahiro Dec. 11, 2015, 8:09 a.m. UTC | #7
Yang,

On 12/11/2015 03:44 AM, Shi, Yang wrote:
> On 12/3/2015 5:58 AM, Marc Zyngier wrote:
>> On 02/12/15 22:40, Ashwin Chaugule wrote:
>>> Hello,
>>>
>>> On 24 November 2015 at 17:25, Geoff Levand <geoff@infradead.org> wrote:
>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>
>>>> The current kvm implementation on arm64 does cpu-specific initialization
>>>> at system boot, and has no way to gracefully shutdown a core in terms of
>>>> kvm. This prevents, especially, kexec from rebooting the system on a boot
>>>> core in EL2.
>>>>
>>>> This patch adds a cpu tear-down function and also puts an existing cpu-init
>>>> code into a separate function, kvm_arch_hardware_disable() and
>>>> kvm_arch_hardware_enable() respectively.
>>>> We don't need arm64-specific cpu hotplug hook any more.
>>>>
>>>> Since this patch modifies common part of code between arm and arm64, one
>>>> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
>>>> compiling errors.
>>>>
>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>> ---
>>>>   arch/arm/include/asm/kvm_host.h   | 10 ++++-
>>>>   arch/arm/include/asm/kvm_mmu.h    |  1 +
>>>>   arch/arm/kvm/arm.c                | 79 ++++++++++++++++++---------------------
>>>>   arch/arm/kvm/mmu.c                |  5 +++
>>>>   arch/arm64/include/asm/kvm_host.h | 16 +++++++-
>>>>   arch/arm64/include/asm/kvm_mmu.h  |  1 +
>>>>   arch/arm64/include/asm/virt.h     |  9 +++++
>>>>   arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++++
>>>>   arch/arm64/kvm/hyp.S              | 32 ++++++++++++++--
>>>>   9 files changed, 138 insertions(+), 48 deletions(-)
>>>
>>> [..]
>>>
>>>>
>>>>
>>>>   static struct notifier_block hyp_init_cpu_pm_nb = {
>>>> @@ -1108,11 +1119,6 @@ static int init_hyp_mode(void)
>>>>          }
>>>>
>>>>          /*
>>>> -        * Execute the init code on each CPU.
>>>> -        */
>>>> -       on_each_cpu(cpu_init_hyp_mode, NULL, 1);
>>>> -
>>>> -       /*
>>>>           * Init HYP view of VGIC
>>>>           */
>>>>          err = kvm_vgic_hyp_init();
>>>
>>> With this flow, the cpu_init_hyp_mode() is called only at VM guest
>>> creation, but vgic_hyp_init() is called at bootup. On a system with
>>> GICv3, it looks like we end up with bogus values from the ICH_VTR_EL2
>>> (to get the number of LRs), because we're not reading it from EL2
>>> anymore.
>>
>> Indeed, this is completely broken (I just reproduced the issue on a
>> model). I wish this kind of details had been checked earlier, but thanks
>> for pointing it out.
>
> Sorry for chiming in late. I did run into something similar when I tested some earlier version patches with Akashi San.
>
> The guest bootup just hangs with 4.1 kernel on my LS2085 board which has GICv3. Not sure if this is the same issue. But,
> my bisect did point to this patch.

Can you please try my fixup! patch to determine whether it is the same issue or not?

Thanks
-Takahiro AKASHI


> I used to think it is my kernel's problem (4.1 sounds a little bit old) since Akashi San can't reproduce this on his
> Hikey board.
>
> Thanks,
> Yang
>
>>
>>> Whats the best way to fix this?
>>> - Call kvm_arch_hardware_enable() before vgic_hyp_init() and disable later?
>>> - Fold the VGIC init stuff back into hardware_enable()?
>>
>> None of that works - kvm_arch_hardware_enable() is called once per CPU,
>> while vgic_hyp_init() can only be called once. Also,
>> kvm_arch_hardware_enable() is called from interrupt context, and I
>> wouldn't feel comfortable starting probing DT and allocating stuff from
>> there.
>>
>>> - Read the VGIC number of LRs from the hyp stub?
>>
>> That's may UNDEF if called in the wrong context. Also, this defeats the
>> point of stubs, which is just to install the vectors for the hypervisor.
>>
>>> - ..
>>
>> Yeah, quite.
>>
>> Geoff, Takahiro?
>>
>>     M.
>>
>
Will Deacon Dec. 11, 2015, 4:31 p.m. UTC | #8
On Thu, Dec 10, 2015 at 10:31:48AM -0800, Geoff Levand wrote:
> On Thu, 2015-12-03 at 13:58 +0000, Marc Zyngier wrote:
> > Indeed, this is completely broken (I just reproduced the issue on a
> > model).
> 
> There are users out there that want kexec/kdump support.  I think we
> should remove this patch from the series, replace it with a temporary
> Kconfig conditional that allows only one of KVM or kexec to be enabled,
> and merge kexec and kdump support in for 4.5.  This will satisfy users
> who need kexec but not KVM, like kexec based bootloaders.  Once this
> KVM hot plug patch is fixed we then merge it, either for 4.5 or 4.6,
> depending on the timing.
> 
> I'll prepare and post a v13 series that does the above.

I'm not really keen on merging this based on a "temporary" Kconfig change
with a promise to have it resolved in the future. Given how long this
series has been floating around already, I'm not filled with confidence
at the prospect of waiting for a fixup patch after it's been merged.

Please address the outstanding review comments before reposting.

Will
Yang Shi Dec. 14, 2015, 6 p.m. UTC | #9
On 12/11/2015 12:09 AM, AKASHI Takahiro wrote:
> Yang,
>
> On 12/11/2015 03:44 AM, Shi, Yang wrote:
>> On 12/3/2015 5:58 AM, Marc Zyngier wrote:
>>> On 02/12/15 22:40, Ashwin Chaugule wrote:
>>>> Hello,
>>>>
>>>> On 24 November 2015 at 17:25, Geoff Levand <geoff@infradead.org> wrote:
>>>>> From: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>>
>>>>> The current kvm implementation on arm64 does cpu-specific
>>>>> initialization
>>>>> at system boot, and has no way to gracefully shutdown a core in
>>>>> terms of
>>>>> kvm. This prevents, especially, kexec from rebooting the system on
>>>>> a boot
>>>>> core in EL2.
>>>>>
>>>>> This patch adds a cpu tear-down function and also puts an existing
>>>>> cpu-init
>>>>> code into a separate function, kvm_arch_hardware_disable() and
>>>>> kvm_arch_hardware_enable() respectively.
>>>>> We don't need arm64-specific cpu hotplug hook any more.
>>>>>
>>>>> Since this patch modifies common part of code between arm and
>>>>> arm64, one
>>>>> stub definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
>>>>> compiling errors.
>>>>>
>>>>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>>>>> ---
>>>>>   arch/arm/include/asm/kvm_host.h   | 10 ++++-
>>>>>   arch/arm/include/asm/kvm_mmu.h    |  1 +
>>>>>   arch/arm/kvm/arm.c                | 79
>>>>> ++++++++++++++++++---------------------
>>>>>   arch/arm/kvm/mmu.c                |  5 +++
>>>>>   arch/arm64/include/asm/kvm_host.h | 16 +++++++-
>>>>>   arch/arm64/include/asm/kvm_mmu.h  |  1 +
>>>>>   arch/arm64/include/asm/virt.h     |  9 +++++
>>>>>   arch/arm64/kvm/hyp-init.S         | 33 ++++++++++++++++
>>>>>   arch/arm64/kvm/hyp.S              | 32 ++++++++++++++--
>>>>>   9 files changed, 138 insertions(+), 48 deletions(-)
>>>>
>>>> [..]
>>>>
>>>>>
>>>>>
>>>>>   static struct notifier_block hyp_init_cpu_pm_nb = {
>>>>> @@ -1108,11 +1119,6 @@ static int init_hyp_mode(void)
>>>>>          }
>>>>>
>>>>>          /*
>>>>> -        * Execute the init code on each CPU.
>>>>> -        */
>>>>> -       on_each_cpu(cpu_init_hyp_mode, NULL, 1);
>>>>> -
>>>>> -       /*
>>>>>           * Init HYP view of VGIC
>>>>>           */
>>>>>          err = kvm_vgic_hyp_init();
>>>>
>>>> With this flow, the cpu_init_hyp_mode() is called only at VM guest
>>>> creation, but vgic_hyp_init() is called at bootup. On a system with
>>>> GICv3, it looks like we end up with bogus values from the ICH_VTR_EL2
>>>> (to get the number of LRs), because we're not reading it from EL2
>>>> anymore.
>>>
>>> Indeed, this is completely broken (I just reproduced the issue on a
>>> model). I wish this kind of details had been checked earlier, but thanks
>>> for pointing it out.
>>
>> Sorry for chiming in late. I did run into something similar when I
>> tested some earlier version patches with Akashi San.
>>
>> The guest bootup just hangs with 4.1 kernel on my LS2085 board which
>> has GICv3. Not sure if this is the same issue. But,
>> my bisect did point to this patch.
>
> Can you please try my fixup! patch to determine whether it is the same
> issue or not?

I wish I could help this time, however, unfortunately, my LS2085 board 
had some hardware failure so I lost the access to it. Once I get a 
replacement or have it fixed, I will have a try.

Regards,
Yang

>
> Thanks
> -Takahiro AKASHI
>
>
>> I used to think it is my kernel's problem (4.1 sounds a little bit
>> old) since Akashi San can't reproduce this on his
>> Hikey board.
>>
>> Thanks,
>> Yang
>>
>>>
>>>> Whats the best way to fix this?
>>>> - Call kvm_arch_hardware_enable() before vgic_hyp_init() and disable
>>>> later?
>>>> - Fold the VGIC init stuff back into hardware_enable()?
>>>
>>> None of that works - kvm_arch_hardware_enable() is called once per CPU,
>>> while vgic_hyp_init() can only be called once. Also,
>>> kvm_arch_hardware_enable() is called from interrupt context, and I
>>> wouldn't feel comfortable starting probing DT and allocating stuff from
>>> there.
>>>
>>>> - Read the VGIC number of LRs from the hyp stub?
>>>
>>> That's may UNDEF if called in the wrong context. Also, this defeats the
>>> point of stubs, which is just to install the vectors for the hypervisor.
>>>
>>>> - ..
>>>
>>> Yeah, quite.
>>>
>>> Geoff, Takahiro?
>>>
>>>     M.
>>>
>>
AKASHI Takahiro Dec. 15, 2015, 8:48 a.m. UTC | #10
Will,

On 12/12/2015 01:31 AM, Will Deacon wrote:
> On Thu, Dec 10, 2015 at 10:31:48AM -0800, Geoff Levand wrote:
>> On Thu, 2015-12-03 at 13:58 +0000, Marc Zyngier wrote:
>>> Indeed, this is completely broken (I just reproduced the issue on a
>>> model).
>>
>> There are users out there that want kexec/kdump support.  I think we
>> should remove this patch from the series, replace it with a temporary
>> Kconfig conditional that allows only one of KVM or kexec to be enabled,
>> and merge kexec and kdump support in for 4.5.  This will satisfy users
>> who need kexec but not KVM, like kexec based bootloaders.  Once this
>> KVM hot plug patch is fixed we then merge it, either for 4.5 or 4.6,
>> depending on the timing.
>>
>> I'll prepare and post a v13 series that does the above.
>
> I'm not really keen on merging this based on a "temporary" Kconfig change
> with a promise to have it resolved in the future. Given how long this
> series has been floating around already, I'm not filled with confidence
> at the prospect of waiting for a fixup patch after it's been merged.
>
> Please address the outstanding review comments before reposting.

I'm working hard on addressing Marc's comment on kvm cpu hotplug, and so
can you please review other *main* part of kexec/kdump patches?
Or can I think that 'no comments' is a good sign of your acceptance?

Thanks,
-Takahiro AKASHI


> Will
>
diff mbox

Patch

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6692982..9242765 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -214,6 +214,15 @@  static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 	kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
 }
 
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+					phys_addr_t phys_idmap_start)
+{
+	/*
+	 * TODO
+	 * kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
+	 */
+}
+
 static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 {
 	return 0;
@@ -226,7 +235,6 @@  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
-static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 405aa18..dc6fadf 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -66,6 +66,7 @@  void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index eab83b2..a5d9d74 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -16,7 +16,6 @@ 
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
-#include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
@@ -61,6 +60,8 @@  static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(preemptible());
@@ -85,11 +86,6 @@  struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void)
-{
-	return 0;
-}
-
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -959,7 +955,7 @@  long kvm_arch_vm_ioctl(struct file *filp,
 	}
 }
 
-static void cpu_init_hyp_mode(void *dummy)
+int kvm_arch_hardware_enable(void)
 {
 	phys_addr_t boot_pgd_ptr;
 	phys_addr_t pgd_ptr;
@@ -967,6 +963,9 @@  static void cpu_init_hyp_mode(void *dummy)
 	unsigned long stack_page;
 	unsigned long vector_ptr;
 
+	if (__hyp_get_vectors() != hyp_default_vectors)
+		return 0;
+
 	/* Switch from the HYP stub to our own HYP init vector */
 	__hyp_set_vectors(kvm_get_idmap_vector());
 
@@ -979,38 +978,50 @@  static void cpu_init_hyp_mode(void *dummy)
 	__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
 
 	kvm_arm_init_debug();
+
+	return 0;
 }
 
-static int hyp_init_cpu_notify(struct notifier_block *self,
-			       unsigned long action, void *cpu)
+void kvm_arch_hardware_disable(void)
 {
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		if (__hyp_get_vectors() == hyp_default_vectors)
-			cpu_init_hyp_mode(NULL);
-		break;
-	}
+	phys_addr_t boot_pgd_ptr;
+	phys_addr_t phys_idmap_start;
 
-	return NOTIFY_OK;
-}
+	if (__hyp_get_vectors() == hyp_default_vectors)
+		return;
 
-static struct notifier_block hyp_init_cpu_nb = {
-	.notifier_call = hyp_init_cpu_notify,
-};
+	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+	phys_idmap_start = kvm_get_idmap_start();
+
+	__cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start);
+}
 
 #ifdef CONFIG_CPU_PM
 static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
 				    unsigned long cmd,
 				    void *v)
 {
-	if (cmd == CPU_PM_EXIT &&
-	    __hyp_get_vectors() == hyp_default_vectors) {
-		cpu_init_hyp_mode(NULL);
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		if (__hyp_get_vectors() != hyp_default_vectors)
+			__this_cpu_write(kvm_arm_hardware_enabled, 1);
+		else
+			__this_cpu_write(kvm_arm_hardware_enabled, 0);
+		/*
+		 * don't call kvm_arch_hardware_disable() in case of
+		 * CPU_PM_ENTER because it does't actually save any state.
+		 */
+
+		return NOTIFY_OK;
+	case CPU_PM_EXIT:
+		if (__this_cpu_read(kvm_arm_hardware_enabled))
+			kvm_arch_hardware_enable();
+
 		return NOTIFY_OK;
-	}
 
-	return NOTIFY_DONE;
+	default:
+		return NOTIFY_DONE;
+	}
 }
 
 static struct notifier_block hyp_init_cpu_pm_nb = {
@@ -1108,11 +1119,6 @@  static int init_hyp_mode(void)
 	}
 
 	/*
-	 * Execute the init code on each CPU.
-	 */
-	on_each_cpu(cpu_init_hyp_mode, NULL, 1);
-
-	/*
 	 * Init HYP view of VGIC
 	 */
 	err = kvm_vgic_hyp_init();
@@ -1186,26 +1192,15 @@  int kvm_arch_init(void *opaque)
 		}
 	}
 
-	cpu_notifier_register_begin();
-
 	err = init_hyp_mode();
 	if (err)
 		goto out_err;
 
-	err = __register_cpu_notifier(&hyp_init_cpu_nb);
-	if (err) {
-		kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
-		goto out_err;
-	}
-
-	cpu_notifier_register_done();
-
 	hyp_cpu_pm_init();
 
 	kvm_coproc_table_init();
 	return 0;
 out_err:
-	cpu_notifier_register_done();
 	return err;
 }
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 6984342..69b4a33 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1644,6 +1644,11 @@  phys_addr_t kvm_get_idmap_vector(void)
 	return hyp_idmap_vector;
 }
 
+phys_addr_t kvm_get_idmap_start(void)
+{
+	return hyp_idmap_start;
+}
+
 int kvm_mmu_init(void)
 {
 	int err;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a35ce72..0b540f8 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -223,6 +223,7 @@  struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
+void kvm_call_reset(phys_addr_t boot_pgd_ptr, phys_addr_t phys_idmap_start);
 void force_vm_exit(const cpumask_t *mask);
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
@@ -247,7 +248,20 @@  static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 		     hyp_stack_ptr, vector_ptr);
 }
 
-static inline void kvm_arch_hardware_disable(void) {}
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+					phys_addr_t phys_idmap_start)
+{
+	/*
+	 * Call reset code, and switch back to stub hyp vectors.
+	 */
+	kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
+}
+
+struct vgic_sr_vectors {
+	void	*save_vgic;
+	void	*restore_vgic;
+};
+
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6150567..ff5a087 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -98,6 +98,7 @@  void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 3070096..bca79f9 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -58,9 +58,18 @@ 
 
 #define HVC_CALL_FUNC 3
 
+/*
+ * HVC_RESET_CPU - Reset cpu in EL2 to initial state.
+ *
+ * @x0: entry address in trampoline code in va
+ * @x1: identical mapping page table in pa
+ */
+
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
+#define HVC_RESET_CPU 4
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 2e67a48..1925163 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -139,6 +139,39 @@  merged:
 	eret
 ENDPROC(__kvm_hyp_init)
 
+	/*
+	 * x0: HYP boot pgd
+	 * x1: HYP phys_idmap_start
+	 */
+ENTRY(__kvm_hyp_reset)
+	/* We're in trampoline code in VA, switch back to boot page tables */
+	msr	ttbr0_el2, x0
+	isb
+
+	/* Invalidate the old TLBs */
+	tlbi	alle2
+	dsb	sy
+
+	/* Branch into PA space */
+	adr	x0, 1f
+	bfi	x1, x0, #0, #PAGE_SHIFT
+	br	x1
+
+	/* We're now in idmap, disable MMU */
+1:	mrs	x0, sctlr_el2
+	ldr	x1, =SCTLR_EL2_FLAGS
+	bic	x0, x0, x1		// Clear SCTL_M and etc
+	msr	sctlr_el2, x0
+	isb
+
+	/* Install stub vectors */
+	adrp	x0, __hyp_stub_vectors
+	add	x0, x0, #:lo12:__hyp_stub_vectors
+	msr	vbar_el2, x0
+
+	eret
+ENDPROC(__kvm_hyp_reset)
+
 	.ltorg
 
 	.popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 1bef8db..aca11d6 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -939,6 +939,11 @@  ENTRY(kvm_call_hyp)
 	ret
 ENDPROC(kvm_call_hyp)
 
+ENTRY(kvm_call_reset)
+	hvc	#HVC_RESET_CPU
+	ret
+ENDPROC(kvm_call_reset)
+
 .macro invalid_vector	label, target
 	.align	2
 \label:
@@ -982,10 +987,27 @@  el1_sync:					// Guest trapped into EL2
 	cmp	x18, #HVC_GET_VECTORS
 	b.ne	1f
 	mrs	x0, vbar_el2
-	b	2f
-
-1:	/* Default to HVC_CALL_HYP. */
+	b	do_eret
 
+	/* jump into trampoline code */
+1:	cmp	x18, #HVC_RESET_CPU
+	b.ne	2f
+	/*
+	 * Entry point is:
+	 *	TRAMPOLINE_VA
+	 *	+ (__kvm_hyp_reset - (__hyp_idmap_text_start & PAGE_MASK))
+	 */
+	adrp	x2, __kvm_hyp_reset
+	add	x2, x2, #:lo12:__kvm_hyp_reset
+	adrp	x3, __hyp_idmap_text_start
+	add	x3, x3, #:lo12:__hyp_idmap_text_start
+	and	x3, x3, PAGE_MASK
+	sub	x2, x2, x3
+	ldr	x3, =TRAMPOLINE_VA
+	add	x2, x2, x3
+	br	x2				// no return
+
+2:	/* Default to HVC_CALL_HYP. */
 	push	lr, xzr
 
 	/*
@@ -999,7 +1021,9 @@  el1_sync:					// Guest trapped into EL2
 	blr	lr
 
 	pop	lr, xzr
-2:	eret
+
+do_eret:
+	eret
 
 el1_trap:
 	/*