diff mbox

[2/2] ARM: soft-reboot into same mode that we entered the kernel

Message ID E1cH751-0003kk-Ts@rmk-PC.armlinux.org.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Russell King (Oracle) Dec. 14, 2016, 10:46 a.m. UTC
When we soft-reboot (eg, kexec) from one kernel into the next, we need
to ensure that we enter the new kernel in the same processor mode as
when we were entered, so that (eg) the new kernel can install its own
hypervisor - the old kernel's hypervisor will have been overwritten.

In order to do this, we need to pass a flag to cpu_reset() so it knows
what to do, and we need to modify the kernel's own hypervisor stub to
allow it to handle a soft-reboot.

As we are always guaranteed to install our own hypervisor if we're
entered in HYP32 mode, and KVM will have moved itself out of the way
on kexec/normal reboot, we can assume that our hypervisor is in place
when we want to kexec, so changing our hypervisor API should not be a
problem.

Tested-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/proc-fns.h |  4 ++--
 arch/arm/kernel/hyp-stub.S      | 13 +++++++++++++
 arch/arm/kernel/reboot.c        |  7 +++++--
 arch/arm/mm/proc-v7.S           | 12 ++++++++----
 4 files changed, 28 insertions(+), 8 deletions(-)

Comments

Mark Rutland Dec. 14, 2016, 11:56 a.m. UTC | #1
On Wed, Dec 14, 2016 at 10:46:35AM +0000, Russell King wrote:
> When we soft-reboot (eg, kexec) from one kernel into the next, we need
> to ensure that we enter the new kernel in the same processor mode as
> when we were entered, so that (eg) the new kernel can install its own
> hypervisor - the old kernel's hypervisor will have been overwritten.
> 
> In order to do this, we need to pass a flag to cpu_reset() so it knows
> what to do, and we need to modify the kernel's own hypervisor stub to
> allow it to handle a soft-reboot.
> 
> As we are always guaranteed to install our own hypervisor if we're
> entered in HYP32 mode, and KVM will have moved itself out of the way
> on kexec/normal reboot, we can assume that our hypervisor is in place
> when we want to kexec, so changing our hypervisor API should not be a
> problem.

Just to check, does that also hold true for kdump?

I haven't gone digging yet, but it looks like KVM might still be
installed, rather than the hyp stub, and we might need some logic to
ensure that it's torn down...
 
[...]

> @@ -51,7 +52,9 @@ static void __soft_restart(void *addr)
>  
>  	/* Switch to the identity mapping. */
>  	phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset);
> -	phys_reset((unsigned long)addr);
> +
> +	/* original stub should be restored by kvm */
> +	phys_reset((unsigned long)addr, is_hyp_mode_available());

... otherwise here we'd call into the KVM hyp code in a potentially
confusing manner.

Otherwise, this looks fine to me.

Thanks,
Mark.
Russell King (Oracle) Dec. 14, 2016, 12:05 p.m. UTC | #2
On Wed, Dec 14, 2016 at 11:56:49AM +0000, Mark Rutland wrote:
> On Wed, Dec 14, 2016 at 10:46:35AM +0000, Russell King wrote:
> > When we soft-reboot (eg, kexec) from one kernel into the next, we need
> > to ensure that we enter the new kernel in the same processor mode as
> > when we were entered, so that (eg) the new kernel can install its own
> > hypervisor - the old kernel's hypervisor will have been overwritten.
> > 
> > In order to do this, we need to pass a flag to cpu_reset() so it knows
> > what to do, and we need to modify the kernel's own hypervisor stub to
> > allow it to handle a soft-reboot.
> > 
> > As we are always guaranteed to install our own hypervisor if we're
> > entered in HYP32 mode, and KVM will have moved itself out of the way
> > on kexec/normal reboot, we can assume that our hypervisor is in place
> > when we want to kexec, so changing our hypervisor API should not be a
> > problem.
> 
> Just to check, does that also hold true for kdump?
> 
> I haven't gone digging yet, but it looks like KVM might still be
> installed, rather than the hyp stub, and we might need some logic to
> ensure that it's torn down...

The same problem will be true of ARM64 - I don't see any solution to
that in the present state.

> > @@ -51,7 +52,9 @@ static void __soft_restart(void *addr)
> >  
> >  	/* Switch to the identity mapping. */
> >  	phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset);
> > -	phys_reset((unsigned long)addr);
> > +
> > +	/* original stub should be restored by kvm */
> > +	phys_reset((unsigned long)addr, is_hyp_mode_available());
> 
> ... otherwise here we'd call into the KVM hyp code in a potentially
> confusing manner.
> 
> Otherwise, this looks fine to me.

The only thing that I can think which would resolve that would be to
lay down a standard API for the hyp code, so things like reboot into
hyp mode can work irrespective of the hyp stub in place.

The issue here is that a panic can happen at any time from any context
with any hyp stub in place, so there _needs_ to be a uniform way to do
this.  It's very bad that we've got this far without this point having
been considered - all we can do right now is to try and fix the issues
as they come up.

Right now, let's fix this so we get some kind of improvement, and later
try to sort out some kind of uniform interface for this task.
Mark Rutland Dec. 14, 2016, 12:17 p.m. UTC | #3
On Wed, Dec 14, 2016 at 12:05:41PM +0000, Russell King - ARM Linux wrote:
> On Wed, Dec 14, 2016 at 11:56:49AM +0000, Mark Rutland wrote:
> > On Wed, Dec 14, 2016 at 10:46:35AM +0000, Russell King wrote:
> > > When we soft-reboot (eg, kexec) from one kernel into the next, we need
> > > to ensure that we enter the new kernel in the same processor mode as
> > > when we were entered, so that (eg) the new kernel can install its own
> > > hypervisor - the old kernel's hypervisor will have been overwritten.
> > > 
> > > In order to do this, we need to pass a flag to cpu_reset() so it knows
> > > what to do, and we need to modify the kernel's own hypervisor stub to
> > > allow it to handle a soft-reboot.
> > > 
> > > As we are always guaranteed to install our own hypervisor if we're
> > > entered in HYP32 mode, and KVM will have moved itself out of the way
> > > on kexec/normal reboot, we can assume that our hypervisor is in place
> > > when we want to kexec, so changing our hypervisor API should not be a
> > > problem.
> > 
> > Just to check, does that also hold true for kdump?
> > 
> > I haven't gone digging yet, but it looks like KVM might still be
> > installed, rather than the hyp stub, and we might need some logic to
> > ensure that it's torn down...
> 
> The same problem will be true of ARM64 - I don't see any solution to
> that in the present state.

Sure. We don't have kdump suppoort yet, and I intend for that to be
addressed before kdump support is merged.

> > > @@ -51,7 +52,9 @@ static void __soft_restart(void *addr)
> > >  
> > >  	/* Switch to the identity mapping. */
> > >  	phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset);
> > > -	phys_reset((unsigned long)addr);
> > > +
> > > +	/* original stub should be restored by kvm */
> > > +	phys_reset((unsigned long)addr, is_hyp_mode_available());
> > 
> > ... otherwise here we'd call into the KVM hyp code in a potentially
> > confusing manner.
> > 
> > Otherwise, this looks fine to me.
> 
> The only thing that I can think which would resolve that would be to
> lay down a standard API for the hyp code, so things like reboot into
> hyp mode can work irrespective of the hyp stub in place.

Sure; having the KVM hyp code also implement HVC_SOFT_RESTART seems
sensible. This would also work for arm64.

> The issue here is that a panic can happen at any time from any context
> with any hyp stub in place, so there _needs_ to be a uniform way to do
> this.  It's very bad that we've got this far without this point having
> been considered - all we can do right now is to try and fix the issues
> as they come up.
> 
> Right now, let's fix this so we get some kind of improvement, and later
> try to sort out some kind of uniform interface for this task.

Sure, that's a bigger task, and this is definitely a step in the right
direction.

We need to avoid the kdump regression somehow though; can we somehow
detect if KVM is active and avoid issuing the HVC_SOFT_RESTART?

Thanks,
Mark.
Russell King (Oracle) Dec. 14, 2016, 12:29 p.m. UTC | #4
On Wed, Dec 14, 2016 at 12:17:47PM +0000, Mark Rutland wrote:
> On Wed, Dec 14, 2016 at 12:05:41PM +0000, Russell King - ARM Linux wrote:
> > The issue here is that a panic can happen at any time from any context
> > with any hyp stub in place, so there _needs_ to be a uniform way to do
> > this.  It's very bad that we've got this far without this point having
> > been considered - all we can do right now is to try and fix the issues
> > as they come up.
> > 
> > Right now, let's fix this so we get some kind of improvement, and later
> > try to sort out some kind of uniform interface for this task.
> 
> Sure, that's a bigger task, and this is definitely a step in the right
> direction.
> 
> We need to avoid the kdump regression somehow though; can we somehow
> detect if KVM is active and avoid issuing the HVC_SOFT_RESTART?

That's a question for KVM people.

However, there's a bigger question, which is: what do we want to happen
in the case of kdump - do we want to be entering the kdump kernel in
HYP or SVC mode?  As the kdump kernel exists to be able to save the
state of a crashing system, the kdump kernel should do that and then
restart the system in a clean way (iow, not via yet another kexec.)

So maybe the right answer is that kdump should always invoke the kernel
in SVC mode.
Mark Rutland Dec. 14, 2016, 12:40 p.m. UTC | #5
On Wed, Dec 14, 2016 at 12:29:45PM +0000, Russell King - ARM Linux wrote:
> On Wed, Dec 14, 2016 at 12:17:47PM +0000, Mark Rutland wrote:
> > On Wed, Dec 14, 2016 at 12:05:41PM +0000, Russell King - ARM Linux wrote:
> > > The issue here is that a panic can happen at any time from any context
> > > with any hyp stub in place, so there _needs_ to be a uniform way to do
> > > this.  It's very bad that we've got this far without this point having
> > > been considered - all we can do right now is to try and fix the issues
> > > as they come up.
> > > 
> > > Right now, let's fix this so we get some kind of improvement, and later
> > > try to sort out some kind of uniform interface for this task.
> > 
> > Sure, that's a bigger task, and this is definitely a step in the right
> > direction.
> > 
> > We need to avoid the kdump regression somehow though; can we somehow
> > detect if KVM is active and avoid issuing the HVC_SOFT_RESTART?
> 
> That's a question for KVM people.
> 
> However, there's a bigger question, which is: what do we want to happen
> in the case of kdump - do we want to be entering the kdump kernel in
> HYP or SVC mode?  As the kdump kernel exists to be able to save the
> state of a crashing system, the kdump kernel should do that and then
> restart the system in a clean way (iow, not via yet another kexec.)
> 
> So maybe the right answer is that kdump should always invoke the kernel
> in SVC mode.

Personally, my view is the opposite -- we should always exit the way we
came, and kdump can go from there. Otherwise we're leaving context
active in hyp mode that might hinder kdump (or would otherwise be useful
for kdump to be able to access).

For example, we might want to do something like kernel guard, where even
the host kernel would have a stage-2 translation active in hyp that we'd
need to tear down. That, or the hyp page table might have been
corrupted, and tearing down ASAP might save us from asynchrnous issues
from page table walks. It's all a trade-off, either way -- the hyp code
could also be corrupt.

Certainly I would expect that once kdump's logged what it can, it should
trigger a real reboot.

Thanks,
Mark.

[1] https://01.org/intel-kgt
Russell King (Oracle) Dec. 14, 2016, 12:46 p.m. UTC | #6
On Wed, Dec 14, 2016 at 12:40:56PM +0000, Mark Rutland wrote:
> On Wed, Dec 14, 2016 at 12:29:45PM +0000, Russell King - ARM Linux wrote:
> > On Wed, Dec 14, 2016 at 12:17:47PM +0000, Mark Rutland wrote:
> > > On Wed, Dec 14, 2016 at 12:05:41PM +0000, Russell King - ARM Linux wrote:
> > > > The issue here is that a panic can happen at any time from any context
> > > > with any hyp stub in place, so there _needs_ to be a uniform way to do
> > > > this.  It's very bad that we've got this far without this point having
> > > > been considered - all we can do right now is to try and fix the issues
> > > > as they come up.
> > > > 
> > > > Right now, let's fix this so we get some kind of improvement, and later
> > > > try to sort out some kind of uniform interface for this task.
> > > 
> > > Sure, that's a bigger task, and this is definitely a step in the right
> > > direction.
> > > 
> > > We need to avoid the kdump regression somehow though; can we somehow
> > > detect if KVM is active and avoid issuing the HVC_SOFT_RESTART?
> > 
> > That's a question for KVM people.
> > 
> > However, there's a bigger question, which is: what do we want to happen
> > in the case of kdump - do we want to be entering the kdump kernel in
> > HYP or SVC mode?  As the kdump kernel exists to be able to save the
> > state of a crashing system, the kdump kernel should do that and then
> > restart the system in a clean way (iow, not via yet another kexec.)
> > 
> > So maybe the right answer is that kdump should always invoke the kernel
> > in SVC mode.
> 
> Personally, my view is the opposite -- we should always exit the way we
> came, and kdump can go from there. Otherwise we're leaving context
> active in hyp mode that might hinder kdump (or would otherwise be useful
> for kdump to be able to access).
> 
> For example, we might want to do something like kernel guard, where even
> the host kernel would have a stage-2 translation active in hyp that we'd
> need to tear down. That, or the hyp page table might have been
> corrupted, and tearing down ASAP might save us from asynchrnous issues
> from page table walks. It's all a trade-off, either way -- the hyp code
> could also be corrupt.

However, the issue there is that if there is a hyp page table present,
it means that we're no longer saving out a true image of the running
system - the core dump expects to be an image as seen from previously
running kernel, which would be in SVC mode.

If we save out the view of memory from HYP mode, we're no longer saving
out the same thing - and the physical addresses which are stored within
kdump for things like the CPU states (which are physical addresses as
seen in SVC mode) are no longer valid.

So, to make kdump work in this situation probably needs a significant
rework of kdump's idea of what a physical address is.

In the mean time, I think the approach I've put forward is the only
sensible one until we can be sure that a HYP mode core-dump really does
make sense.
Marc Zyngier Dec. 14, 2016, 1:42 p.m. UTC | #7
On 14/12/16 12:05, Russell King - ARM Linux wrote:
> On Wed, Dec 14, 2016 at 11:56:49AM +0000, Mark Rutland wrote:
>> On Wed, Dec 14, 2016 at 10:46:35AM +0000, Russell King wrote:
>>> When we soft-reboot (eg, kexec) from one kernel into the next, we need
>>> to ensure that we enter the new kernel in the same processor mode as
>>> when we were entered, so that (eg) the new kernel can install its own
>>> hypervisor - the old kernel's hypervisor will have been overwritten.
>>>
>>> In order to do this, we need to pass a flag to cpu_reset() so it knows
>>> what to do, and we need to modify the kernel's own hypervisor stub to
>>> allow it to handle a soft-reboot.
>>>
>>> As we are always guaranteed to install our own hypervisor if we're
>>> entered in HYP32 mode, and KVM will have moved itself out of the way
>>> on kexec/normal reboot, we can assume that our hypervisor is in place
>>> when we want to kexec, so changing our hypervisor API should not be a
>>> problem.
>>
>> Just to check, does that also hold true for kdump?
>>
>> I haven't gone digging yet, but it looks like KVM might still be
>> installed, rather than the hyp stub, and we might need some logic to
>> ensure that it's torn down...
> 
> The same problem will be true of ARM64 - I don't see any solution to
> that in the present state.
> 
>>> @@ -51,7 +52,9 @@ static void __soft_restart(void *addr)
>>>  
>>>  	/* Switch to the identity mapping. */
>>>  	phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset);
>>> -	phys_reset((unsigned long)addr);
>>> +
>>> +	/* original stub should be restored by kvm */
>>> +	phys_reset((unsigned long)addr, is_hyp_mode_available());
>>
>> ... otherwise here we'd call into the KVM hyp code in a potentially
>> confusing manner.
>>
>> Otherwise, this looks fine to me.
> 
> The only thing that I can think which would resolve that would be to
> lay down a standard API for the hyp code, so things like reboot into
> hyp mode can work irrespective of the hyp stub in place.

That looks like a sensible thing to do. I'll look into it.

Thanks,

	M.
diff mbox

Patch

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 8877ad5ffe10..f2e1af45bd6f 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -43,7 +43,7 @@  extern struct processor {
 	/*
 	 * Special stuff for a reset
 	 */
-	void (*reset)(unsigned long addr) __attribute__((noreturn));
+	void (*reset)(unsigned long addr, bool hvc) __attribute__((noreturn));
 	/*
 	 * Idle the processor
 	 */
@@ -88,7 +88,7 @@  extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte);
 #else
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 #endif
-extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn));
 
 /* These three are private to arch/arm/kernel/suspend.c */
 extern void cpu_do_suspend(void *);
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index f3e9ba5fb642..82915231c6f8 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -24,6 +24,7 @@ 
 
 #define HVC_GET_VECTORS 0
 #define HVC_SET_VECTORS 1
+#define HVC_SOFT_RESTART 2
 
 #ifndef ZIMAGE
 /*
@@ -215,6 +216,10 @@  ENDPROC(__hyp_stub_install_secondary)
 	mcr	p15, 4, r1, c12, c0, 0	@ set HVBAR
 	b	__hyp_stub_exit
 
+1:	teq	r0, #HVC_SOFT_RESTART
+	bne	1f
+	bx	r3
+
 1:	mov	r0, #-1
 
 __hyp_stub_exit:
@@ -256,6 +261,14 @@  ENTRY(__hyp_set_vectors)
 	ret	lr
 ENDPROC(__hyp_set_vectors)
 
+ENTRY(__hyp_soft_restart)
+	mov	r3, r0
+	mov	r0, #HVC_SOFT_RESTART
+	__HVC(0)
+	mov	r0, r3
+	ret	lr
+ENDPROC(__hyp_soft_restart)
+
 #ifndef ZIMAGE
 .align 2
 .L__boot_cpu_mode_offset:
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
index 3fa867a2aae6..3b2aa9a9fe26 100644
--- a/arch/arm/kernel/reboot.c
+++ b/arch/arm/kernel/reboot.c
@@ -12,10 +12,11 @@ 
 
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
+#include <asm/virt.h>
 
 #include "reboot.h"
 
-typedef void (*phys_reset_t)(unsigned long);
+typedef void (*phys_reset_t)(unsigned long, bool);
 
 /*
  * Function pointers to optional machine specific functions
@@ -51,7 +52,9 @@  static void __soft_restart(void *addr)
 
 	/* Switch to the identity mapping. */
 	phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset);
-	phys_reset((unsigned long)addr);
+
+	/* original stub should be restored by kvm */
+	phys_reset((unsigned long)addr, is_hyp_mode_available());
 
 	/* Should never get here. */
 	BUG();
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index d00d52c9de3e..1846ca4255d0 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -53,11 +53,15 @@  ENDPROC(cpu_v7_proc_fin)
 	.align	5
 	.pushsection	.idmap.text, "ax"
 ENTRY(cpu_v7_reset)
-	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
-	bic	r1, r1, #0x1			@ ...............m
- THUMB(	bic	r1, r1, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)
-	mcr	p15, 0, r1, c1, c0, 0		@ disable MMU
+	mrc	p15, 0, r2, c1, c0, 0		@ ctrl register
+	bic	r2, r2, #0x1			@ ...............m
+ THUMB(	bic	r2, r2, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)
+	mcr	p15, 0, r2, c1, c0, 0		@ disable MMU
 	isb
+#ifdef CONFIG_ARM_VIRT_EXT
+	teq	r1, #0
+	bne	__hyp_soft_restart
+#endif
 	bx	r0
 ENDPROC(cpu_v7_reset)
 	.popsection