diff mbox

[01/37] KVM: arm64: Avoid storing the vcpu pointer on the stack

Message ID 20171012104141.26902-2-christoffer.dall@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Christoffer Dall Oct. 12, 2017, 10:41 a.m. UTC
We already have the percpu area for the host cpu state, which points to
the VCPU, so there's no need to store the VCPU pointer on the stack on
every context switch.  We can be a little more clever and just use
tpidr_el2 for the percpu offset and load the VCPU pointer from the host
context.

This requires us to have a scratch register though, so we take the
chance to rearrange some of the el1_sync code to only look at the
vttbr_el2 to determine if this is a trap from the guest or an HVC from
the host.  We do add an extra check to call the panic code if the kernel
is configured with debugging enabled and we saw a trap from the host
which wasn't an HVC, indicating that we left some EL2 trap configured by
mistake.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h | 20 ++++++++++++++++++++
 arch/arm64/kernel/asm-offsets.c  |  1 +
 arch/arm64/kvm/hyp/entry.S       |  5 +----
 arch/arm64/kvm/hyp/hyp-entry.S   | 39 ++++++++++++++++++---------------------
 arch/arm64/kvm/hyp/switch.c      |  2 +-
 5 files changed, 41 insertions(+), 26 deletions(-)

Comments

Marc Zyngier Oct. 12, 2017, 3:49 p.m. UTC | #1
On 12/10/17 11:41, Christoffer Dall wrote:
> We already have the percpu area for the host cpu state, which points to
> the VCPU, so there's no need to store the VCPU pointer on the stack on
> every context switch.  We can be a little more clever and just use
> tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> context.
> 
> This requires us to have a scratch register though, so we take the
> chance to rearrange some of the el1_sync code to only look at the
> vttbr_el2 to determine if this is a trap from the guest or an HVC from
> the host.  We do add an extra check to call the panic code if the kernel
> is configured with debugging enabled and we saw a trap from the host
> which wasn't an HVC, indicating that we left some EL2 trap configured by
> mistake.
> 
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> ---
>  arch/arm64/include/asm/kvm_asm.h | 20 ++++++++++++++++++++
>  arch/arm64/kernel/asm-offsets.c  |  1 +
>  arch/arm64/kvm/hyp/entry.S       |  5 +----
>  arch/arm64/kvm/hyp/hyp-entry.S   | 39 ++++++++++++++++++---------------------
>  arch/arm64/kvm/hyp/switch.c      |  2 +-
>  5 files changed, 41 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index ab4d0a9..7e48a39 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -70,4 +70,24 @@ extern u32 __init_stage2_translation(void);
>  
>  #endif
>  
> +#ifdef __ASSEMBLY__
> +.macro get_host_ctxt reg, tmp
> +	/*
> +	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> +	 * not be accessible by this address from EL2, hyp_panic() converts
> +	 * it with kern_hyp_va() before use.
> +	 */

This really looks like a stale comment, as there is no hyp_panic
involved here anymore (thankfully!).

> +	ldr	\reg, =kvm_host_cpu_state
> +	mrs	\tmp, tpidr_el2
> +	add	\reg, \reg, \tmp
> +	kern_hyp_va \reg

Here, we're trading a load from the stack for a load from the constant
pool. Can't we do something like:

	adr_l	\reg, kvm_host_cpu_state
	msr	\tmp, tpidr_el2
	add	\reg, \reg, \tmp

and that's it? This relies on the property that the kernel/hyp offset is
constant, and that it doesn't matter if we add the offset to a kernel VA
or a HYP VA... Completely untested of course!

> +.endm
> +
> +.macro get_vcpu vcpu, ctxt
> +	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
> +	kern_hyp_va	\vcpu
> +.endm
> +
> +#endif
> +
>  #endif /* __ARM_KVM_ASM_H__ */
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 71bf088..612021d 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -135,6 +135,7 @@ int main(void)
>    DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
>    DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
>    DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
> +  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
>  #endif
>  #ifdef CONFIG_CPU_PM
>    DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> index 9a8ab5d..76cd48f 100644
> --- a/arch/arm64/kvm/hyp/entry.S
> +++ b/arch/arm64/kvm/hyp/entry.S
> @@ -62,9 +62,6 @@ ENTRY(__guest_enter)
>  	// Store the host regs
>  	save_callee_saved_regs x1
>  
> -	// Store host_ctxt and vcpu for use at exit time
> -	stp	x1, x0, [sp, #-16]!
> -
>  	add	x18, x0, #VCPU_CONTEXT
>  
>  	// Restore guest regs x0-x17
> @@ -119,7 +116,7 @@ ENTRY(__guest_exit)
>  	save_callee_saved_regs x1
>  
>  	// Restore the host_ctxt from the stack

Stale comment again.

> -	ldr	x2, [sp], #16
> +	get_host_ctxt	x2, x3
>  
>  	// Now restore the host regs
>  	restore_callee_saved_regs x2
> diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> index e4f37b9..2950f26 100644
> --- a/arch/arm64/kvm/hyp/hyp-entry.S
> +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> @@ -56,19 +56,16 @@ ENDPROC(__vhe_hyp_call)
>  el1_sync:				// Guest trapped into EL2
>  	stp	x0, x1, [sp, #-16]!
>  
> -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> -	mrs	x1, esr_el2
> -alternative_else
> -	mrs	x1, esr_el1
> -alternative_endif
> -	lsr	x0, x1, #ESR_ELx_EC_SHIFT
> -
> -	cmp	x0, #ESR_ELx_EC_HVC64
> -	b.ne	el1_trap
> -
>  	mrs	x1, vttbr_el2		// If vttbr is valid, the 64bit guest
>  	cbnz	x1, el1_trap		// called HVC

Comment is outdated. Any guest trap will take this path.

>  
> +#ifdef CONFIG_DEBUG
> +	mrs	x0, esr_el2
> +	lsr	x0, x0, #ESR_ELx_EC_SHIFT
> +	cmp     x0, #ESR_ELx_EC_HVC64
> +	b.ne    __hyp_panic
> +#endif
> +
>  	/* Here, we're pretty sure the host called HVC. */
>  	ldp	x0, x1, [sp], #16
>  
> @@ -101,10 +98,15 @@ alternative_endif
>  	eret
>  
>  el1_trap:
> +	get_host_ctxt	x0, x1
> +	get_vcpu	x1, x0
> +
> +	mrs		x0, esr_el2
> +	lsr		x0, x0, #ESR_ELx_EC_SHIFT
>  	/*
>  	 * x0: ESR_EC
> +	 * x1: vcpu pointer
>  	 */
> -	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
>  
>  	/*
>  	 * We trap the first access to the FP/SIMD to save the host context
> @@ -122,13 +124,15 @@ alternative_else_nop_endif
>  
>  el1_irq:
>  	stp     x0, x1, [sp, #-16]!
> -	ldr	x1, [sp, #16 + 8]
> +	get_host_ctxt	x0, x1
> +	get_vcpu	x1, x0
>  	mov	x0, #ARM_EXCEPTION_IRQ
>  	b	__guest_exit
>  
>  el1_error:
>  	stp     x0, x1, [sp, #-16]!
> -	ldr	x1, [sp, #16 + 8]
> +	get_host_ctxt	x0, x1
> +	get_vcpu	x1, x0
>  	mov	x0, #ARM_EXCEPTION_EL1_SERROR
>  	b	__guest_exit
>  
> @@ -164,14 +168,7 @@ ENTRY(__hyp_do_panic)
>  ENDPROC(__hyp_do_panic)
>  
>  ENTRY(__hyp_panic)
> -	/*
> -	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> -	 * not be accessible by this address from EL2, hyp_panic() converts
> -	 * it with kern_hyp_va() before use.
> -	 */
> -	ldr	x0, =kvm_host_cpu_state
> -	mrs	x1, tpidr_el2
> -	add	x0, x0, x1
> +	get_host_ctxt x0, x1
>  	b	hyp_panic
>  ENDPROC(__hyp_panic)
>  
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 69ef24a..a0123ad 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -435,7 +435,7 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
>  	if (read_sysreg(vttbr_el2)) {
>  		struct kvm_cpu_context *host_ctxt;
>  
> -		host_ctxt = kern_hyp_va(__host_ctxt);
> +		host_ctxt = __host_ctxt;

Can't we just rename __host_ctxt to host_ctxt and drop the local definition?

>  		vcpu = host_ctxt->__hyp_running_vcpu;
>  		__timer_disable_traps(vcpu);
>  		__deactivate_traps(vcpu);
> 

Thanks,

	M.
Christoffer Dall Oct. 12, 2017, 5:02 p.m. UTC | #2
On Thu, Oct 12, 2017 at 04:49:44PM +0100, Marc Zyngier wrote:
> On 12/10/17 11:41, Christoffer Dall wrote:
> > We already have the percpu area for the host cpu state, which points to
> > the VCPU, so there's no need to store the VCPU pointer on the stack on
> > every context switch.  We can be a little more clever and just use
> > tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> > context.
> > 
> > This requires us to have a scratch register though, so we take the
> > chance to rearrange some of the el1_sync code to only look at the
> > vttbr_el2 to determine if this is a trap from the guest or an HVC from
> > the host.  We do add an extra check to call the panic code if the kernel
> > is configured with debugging enabled and we saw a trap from the host
> > which wasn't an HVC, indicating that we left some EL2 trap configured by
> > mistake.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > ---
> >  arch/arm64/include/asm/kvm_asm.h | 20 ++++++++++++++++++++
> >  arch/arm64/kernel/asm-offsets.c  |  1 +
> >  arch/arm64/kvm/hyp/entry.S       |  5 +----
> >  arch/arm64/kvm/hyp/hyp-entry.S   | 39 ++++++++++++++++++---------------------
> >  arch/arm64/kvm/hyp/switch.c      |  2 +-
> >  5 files changed, 41 insertions(+), 26 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> > index ab4d0a9..7e48a39 100644
> > --- a/arch/arm64/include/asm/kvm_asm.h
> > +++ b/arch/arm64/include/asm/kvm_asm.h
> > @@ -70,4 +70,24 @@ extern u32 __init_stage2_translation(void);
> >  
> >  #endif
> >  
> > +#ifdef __ASSEMBLY__
> > +.macro get_host_ctxt reg, tmp
> > +	/*
> > +	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> > +	 * not be accessible by this address from EL2, hyp_panic() converts
> > +	 * it with kern_hyp_va() before use.
> > +	 */
> 
> This really looks like a stale comment, as there is no hyp_panic
> involved here anymore (thankfully!).
> 

yeah, I suppose.

> > +	ldr	\reg, =kvm_host_cpu_state
> > +	mrs	\tmp, tpidr_el2
> > +	add	\reg, \reg, \tmp
> > +	kern_hyp_va \reg
> 
> Here, we're trading a load from the stack for a load from the constant
> pool. Can't we do something like:
> 
> 	adr_l	\reg, kvm_host_cpu_state
> 	msr	\tmp, tpidr_el2
> 	add	\reg, \reg, \tmp
> 
> and that's it? 

That's definitely what the compiler generates from C code...

> This relies on the property that the kernel/hyp offset is
> constant, and that it doesn't matter if we add the offset to a kernel VA
> or a HYP VA... Completely untested of course!
> 

You're the hyp VA expert.  Is it valid to rely on that assumption?

> > +.endm
> > +
> > +.macro get_vcpu vcpu, ctxt
> > +	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
> > +	kern_hyp_va	\vcpu
> > +.endm
> > +
> > +#endif
> > +
> >  #endif /* __ARM_KVM_ASM_H__ */
> > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> > index 71bf088..612021d 100644
> > --- a/arch/arm64/kernel/asm-offsets.c
> > +++ b/arch/arm64/kernel/asm-offsets.c
> > @@ -135,6 +135,7 @@ int main(void)
> >    DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
> >    DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
> >    DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
> > +  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
> >  #endif
> >  #ifdef CONFIG_CPU_PM
> >    DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
> > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> > index 9a8ab5d..76cd48f 100644
> > --- a/arch/arm64/kvm/hyp/entry.S
> > +++ b/arch/arm64/kvm/hyp/entry.S
> > @@ -62,9 +62,6 @@ ENTRY(__guest_enter)
> >  	// Store the host regs
> >  	save_callee_saved_regs x1
> >  
> > -	// Store host_ctxt and vcpu for use at exit time
> > -	stp	x1, x0, [sp, #-16]!
> > -
> >  	add	x18, x0, #VCPU_CONTEXT
> >  
> >  	// Restore guest regs x0-x17
> > @@ -119,7 +116,7 @@ ENTRY(__guest_exit)
> >  	save_callee_saved_regs x1
> >  
> >  	// Restore the host_ctxt from the stack
> 
> Stale comment again.
> 

yeah...

> > -	ldr	x2, [sp], #16
> > +	get_host_ctxt	x2, x3
> >  
> >  	// Now restore the host regs
> >  	restore_callee_saved_regs x2
> > diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> > index e4f37b9..2950f26 100644
> > --- a/arch/arm64/kvm/hyp/hyp-entry.S
> > +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> > @@ -56,19 +56,16 @@ ENDPROC(__vhe_hyp_call)
> >  el1_sync:				// Guest trapped into EL2
> >  	stp	x0, x1, [sp, #-16]!
> >  
> > -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> > -	mrs	x1, esr_el2
> > -alternative_else
> > -	mrs	x1, esr_el1
> > -alternative_endif
> > -	lsr	x0, x1, #ESR_ELx_EC_SHIFT
> > -
> > -	cmp	x0, #ESR_ELx_EC_HVC64
> > -	b.ne	el1_trap
> > -
> >  	mrs	x1, vttbr_el2		// If vttbr is valid, the 64bit guest
> >  	cbnz	x1, el1_trap		// called HVC
> 
> Comment is outdated. Any guest trap will take this path.
> 

yeah...

> >  
> > +#ifdef CONFIG_DEBUG
> > +	mrs	x0, esr_el2
> > +	lsr	x0, x0, #ESR_ELx_EC_SHIFT
> > +	cmp     x0, #ESR_ELx_EC_HVC64
> > +	b.ne    __hyp_panic
> > +#endif
> > +
> >  	/* Here, we're pretty sure the host called HVC. */
> >  	ldp	x0, x1, [sp], #16
> >  
> > @@ -101,10 +98,15 @@ alternative_endif
> >  	eret
> >  
> >  el1_trap:
> > +	get_host_ctxt	x0, x1
> > +	get_vcpu	x1, x0
> > +
> > +	mrs		x0, esr_el2
> > +	lsr		x0, x0, #ESR_ELx_EC_SHIFT
> >  	/*
> >  	 * x0: ESR_EC
> > +	 * x1: vcpu pointer
> >  	 */
> > -	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
> >  
> >  	/*
> >  	 * We trap the first access to the FP/SIMD to save the host context
> > @@ -122,13 +124,15 @@ alternative_else_nop_endif
> >  
> >  el1_irq:
> >  	stp     x0, x1, [sp, #-16]!
> > -	ldr	x1, [sp, #16 + 8]
> > +	get_host_ctxt	x0, x1
> > +	get_vcpu	x1, x0
> >  	mov	x0, #ARM_EXCEPTION_IRQ
> >  	b	__guest_exit
> >  
> >  el1_error:
> >  	stp     x0, x1, [sp, #-16]!
> > -	ldr	x1, [sp, #16 + 8]
> > +	get_host_ctxt	x0, x1
> > +	get_vcpu	x1, x0
> >  	mov	x0, #ARM_EXCEPTION_EL1_SERROR
> >  	b	__guest_exit
> >  
> > @@ -164,14 +168,7 @@ ENTRY(__hyp_do_panic)
> >  ENDPROC(__hyp_do_panic)
> >  
> >  ENTRY(__hyp_panic)
> > -	/*
> > -	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> > -	 * not be accessible by this address from EL2, hyp_panic() converts
> > -	 * it with kern_hyp_va() before use.
> > -	 */
> > -	ldr	x0, =kvm_host_cpu_state
> > -	mrs	x1, tpidr_el2
> > -	add	x0, x0, x1
> > +	get_host_ctxt x0, x1
> >  	b	hyp_panic
> >  ENDPROC(__hyp_panic)
> >  
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index 69ef24a..a0123ad 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -435,7 +435,7 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> >  	if (read_sysreg(vttbr_el2)) {
> >  		struct kvm_cpu_context *host_ctxt;
> >  
> > -		host_ctxt = kern_hyp_va(__host_ctxt);
> > +		host_ctxt = __host_ctxt;
> 
> Can't we just rename __host_ctxt to host_ctxt and drop the local definition?
> 

yes, patch splitting snafu.  Will fix.

By the way, what I'm going for is anything in the hyp address space has
leading __, and otherwise ot.

> >  		vcpu = host_ctxt->__hyp_running_vcpu;
> >  		__timer_disable_traps(vcpu);
> >  		__deactivate_traps(vcpu);
> > 
> 
Thanks,
-Christoffer
Marc Zyngier Oct. 13, 2017, 11:31 a.m. UTC | #3
On 12/10/17 18:02, Christoffer Dall wrote:
> On Thu, Oct 12, 2017 at 04:49:44PM +0100, Marc Zyngier wrote:
>> On 12/10/17 11:41, Christoffer Dall wrote:
>>> We already have the percpu area for the host cpu state, which points to
>>> the VCPU, so there's no need to store the VCPU pointer on the stack on
>>> every context switch.  We can be a little more clever and just use
>>> tpidr_el2 for the percpu offset and load the VCPU pointer from the host
>>> context.
>>>
>>> This requires us to have a scratch register though, so we take the
>>> chance to rearrange some of the el1_sync code to only look at the
>>> vttbr_el2 to determine if this is a trap from the guest or an HVC from
>>> the host.  We do add an extra check to call the panic code if the kernel
>>> is configured with debugging enabled and we saw a trap from the host
>>> which wasn't an HVC, indicating that we left some EL2 trap configured by
>>> mistake.
>>>
>>> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
>>> ---
>>>  arch/arm64/include/asm/kvm_asm.h | 20 ++++++++++++++++++++
>>>  arch/arm64/kernel/asm-offsets.c  |  1 +
>>>  arch/arm64/kvm/hyp/entry.S       |  5 +----
>>>  arch/arm64/kvm/hyp/hyp-entry.S   | 39 ++++++++++++++++++---------------------
>>>  arch/arm64/kvm/hyp/switch.c      |  2 +-
>>>  5 files changed, 41 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
>>> index ab4d0a9..7e48a39 100644
>>> --- a/arch/arm64/include/asm/kvm_asm.h
>>> +++ b/arch/arm64/include/asm/kvm_asm.h
>>> @@ -70,4 +70,24 @@ extern u32 __init_stage2_translation(void);
>>>  
>>>  #endif
>>>  
>>> +#ifdef __ASSEMBLY__
>>> +.macro get_host_ctxt reg, tmp
>>> +	/*
>>> +	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
>>> +	 * not be accessible by this address from EL2, hyp_panic() converts
>>> +	 * it with kern_hyp_va() before use.
>>> +	 */
>>
>> This really looks like a stale comment, as there is no hyp_panic
>> involved here anymore (thankfully!).
>>
> 
> yeah, I suppose.
> 
>>> +	ldr	\reg, =kvm_host_cpu_state
>>> +	mrs	\tmp, tpidr_el2
>>> +	add	\reg, \reg, \tmp
>>> +	kern_hyp_va \reg
>>
>> Here, we're trading a load from the stack for a load from the constant
>> pool. Can't we do something like:
>>
>> 	adr_l	\reg, kvm_host_cpu_state
>> 	msr	\tmp, tpidr_el2
>> 	add	\reg, \reg, \tmp
>>
>> and that's it? 
> 
> That's definitely what the compiler generates from C code...
> 
>> This relies on the property that the kernel/hyp offset is
>> constant, and that it doesn't matter if we add the offset to a kernel VA
>> or a HYP VA... Completely untested of course!
>>
> 
> You're the hyp VA expert.  Is it valid to rely on that assumption?

Absolutely. Otherwise, we've messed up something really badly.

[...]

>>> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>>> index 69ef24a..a0123ad 100644
>>> --- a/arch/arm64/kvm/hyp/switch.c
>>> +++ b/arch/arm64/kvm/hyp/switch.c
>>> @@ -435,7 +435,7 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
>>>  	if (read_sysreg(vttbr_el2)) {
>>>  		struct kvm_cpu_context *host_ctxt;
>>>  
>>> -		host_ctxt = kern_hyp_va(__host_ctxt);
>>> +		host_ctxt = __host_ctxt;
>>
>> Can't we just rename __host_ctxt to host_ctxt and drop the local definition?
>>
> 
> yes, patch splitting snafu.  Will fix.
> 
> By the way, what I'm going for is anything in the hyp address space has
> leading __, and otherwise ot.

OK, that's a useful convention, actually.

Thanks,

	M.
Andrew Jones Nov. 6, 2017, 5:22 p.m. UTC | #4
On Thu, Oct 12, 2017 at 12:41:05PM +0200, Christoffer Dall wrote:
> We already have the percpu area for the host cpu state, which points to
> the VCPU, so there's no need to store the VCPU pointer on the stack on
> every context switch.  We can be a little more clever and just use
> tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> context.
> 
> This requires us to have a scratch register though, so we take the
> chance to rearrange some of the el1_sync code to only look at the
> vttbr_el2 to determine if this is a trap from the guest or an HVC from
> the host.  We do add an extra check to call the panic code if the kernel
> is configured with debugging enabled and we saw a trap from the host
> which wasn't an HVC, indicating that we left some EL2 trap configured by
> mistake.
> 
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> ---
>  arch/arm64/include/asm/kvm_asm.h | 20 ++++++++++++++++++++
>  arch/arm64/kernel/asm-offsets.c  |  1 +
>  arch/arm64/kvm/hyp/entry.S       |  5 +----
>  arch/arm64/kvm/hyp/hyp-entry.S   | 39 ++++++++++++++++++---------------------
>  arch/arm64/kvm/hyp/switch.c      |  2 +-
>  5 files changed, 41 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index ab4d0a9..7e48a39 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -70,4 +70,24 @@ extern u32 __init_stage2_translation(void);
>  
>  #endif
>  
> +#ifdef __ASSEMBLY__
> +.macro get_host_ctxt reg, tmp
> +	/*
> +	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> +	 * not be accessible by this address from EL2, hyp_panic() converts
> +	 * it with kern_hyp_va() before use.
> +	 */
> +	ldr	\reg, =kvm_host_cpu_state
> +	mrs	\tmp, tpidr_el2
> +	add	\reg, \reg, \tmp
> +	kern_hyp_va \reg
> +.endm
> +
> +.macro get_vcpu vcpu, ctxt
> +	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
> +	kern_hyp_va	\vcpu
> +.endm

To avoid the need for the pattern

  get_host_ctxt x0, x1
  get_vcpu      x1, x0

everywhere this macro is used, how about defining it as

 .macro get_vcpu vcpu, tmp
     get_host_ctxt \tmp, \vcpu    
     ldr     \vcpu, [\tmp, #HOST_CONTEXT_VCPU]
     kern_hyp_va     \vcpu
 .endm

which also has the side-effect of tmp being ctxt after the call.

> +
> +#endif
> +
>  #endif /* __ARM_KVM_ASM_H__ */
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 71bf088..612021d 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -135,6 +135,7 @@ int main(void)
>    DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
>    DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
>    DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
> +  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
>  #endif
>  #ifdef CONFIG_CPU_PM
>    DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> index 9a8ab5d..76cd48f 100644
> --- a/arch/arm64/kvm/hyp/entry.S
> +++ b/arch/arm64/kvm/hyp/entry.S
> @@ -62,9 +62,6 @@ ENTRY(__guest_enter)
>  	// Store the host regs
>  	save_callee_saved_regs x1
>  
> -	// Store host_ctxt and vcpu for use at exit time
> -	stp	x1, x0, [sp, #-16]!
> -
>  	add	x18, x0, #VCPU_CONTEXT
>  
>  	// Restore guest regs x0-x17
> @@ -119,7 +116,7 @@ ENTRY(__guest_exit)
>  	save_callee_saved_regs x1
>  
>  	// Restore the host_ctxt from the stack
> -	ldr	x2, [sp], #16
> +	get_host_ctxt	x2, x3
>  
>  	// Now restore the host regs
>  	restore_callee_saved_regs x2
> diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> index e4f37b9..2950f26 100644
> --- a/arch/arm64/kvm/hyp/hyp-entry.S
> +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> @@ -56,19 +56,16 @@ ENDPROC(__vhe_hyp_call)
>  el1_sync:				// Guest trapped into EL2
>  	stp	x0, x1, [sp, #-16]!
>  
> -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> -	mrs	x1, esr_el2
> -alternative_else
> -	mrs	x1, esr_el1
> -alternative_endif
> -	lsr	x0, x1, #ESR_ELx_EC_SHIFT
> -
> -	cmp	x0, #ESR_ELx_EC_HVC64
> -	b.ne	el1_trap
> -
>  	mrs	x1, vttbr_el2		// If vttbr is valid, the 64bit guest
>  	cbnz	x1, el1_trap		// called HVC
>  
> +#ifdef CONFIG_DEBUG
> +	mrs	x0, esr_el2
> +	lsr	x0, x0, #ESR_ELx_EC_SHIFT
> +	cmp     x0, #ESR_ELx_EC_HVC64
> +	b.ne    __hyp_panic
> +#endif

The dropping of the "alternative_if_not ARM64_HAS_VIRT_HOST_EXTN" stuff
isn't called out in the commit message, but it looks like it was just 
a cleanup of code that was never necessary, as esr_el2 aliases esr_el1.
Is that correct?

> +
>  	/* Here, we're pretty sure the host called HVC. */
>  	ldp	x0, x1, [sp], #16
>  
> @@ -101,10 +98,15 @@ alternative_endif
>  	eret
>  
>  el1_trap:
> +	get_host_ctxt	x0, x1
> +	get_vcpu	x1, x0
> +
> +	mrs		x0, esr_el2
> +	lsr		x0, x0, #ESR_ELx_EC_SHIFT
>  	/*
>  	 * x0: ESR_EC
> +	 * x1: vcpu pointer
>  	 */
> -	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
>  
>  	/*
>  	 * We trap the first access to the FP/SIMD to save the host context
> @@ -122,13 +124,15 @@ alternative_else_nop_endif
>  
>  el1_irq:
>  	stp     x0, x1, [sp, #-16]!
> -	ldr	x1, [sp, #16 + 8]
> +	get_host_ctxt	x0, x1
> +	get_vcpu	x1, x0
>  	mov	x0, #ARM_EXCEPTION_IRQ
>  	b	__guest_exit
>  
>  el1_error:
>  	stp     x0, x1, [sp, #-16]!
> -	ldr	x1, [sp, #16 + 8]
> +	get_host_ctxt	x0, x1
> +	get_vcpu	x1, x0
>  	mov	x0, #ARM_EXCEPTION_EL1_SERROR
>  	b	__guest_exit
>  
> @@ -164,14 +168,7 @@ ENTRY(__hyp_do_panic)
>  ENDPROC(__hyp_do_panic)
>  
>  ENTRY(__hyp_panic)
> -	/*
> -	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> -	 * not be accessible by this address from EL2, hyp_panic() converts
> -	 * it with kern_hyp_va() before use.
> -	 */
> -	ldr	x0, =kvm_host_cpu_state
> -	mrs	x1, tpidr_el2
> -	add	x0, x0, x1
> +	get_host_ctxt x0, x1
>  	b	hyp_panic
>  ENDPROC(__hyp_panic)
>  
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 69ef24a..a0123ad 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -435,7 +435,7 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
>  	if (read_sysreg(vttbr_el2)) {
>  		struct kvm_cpu_context *host_ctxt;
>  
> -		host_ctxt = kern_hyp_va(__host_ctxt);
> +		host_ctxt = __host_ctxt;
>  		vcpu = host_ctxt->__hyp_running_vcpu;
>  		__timer_disable_traps(vcpu);
>  		__deactivate_traps(vcpu);
> -- 
> 2.9.0
>

Thanks,
drew
Christoffer Dall Nov. 7, 2017, 8:24 a.m. UTC | #5
On Mon, Nov 06, 2017 at 06:22:51PM +0100, Andrew Jones wrote:
> On Thu, Oct 12, 2017 at 12:41:05PM +0200, Christoffer Dall wrote:
> > We already have the percpu area for the host cpu state, which points to
> > the VCPU, so there's no need to store the VCPU pointer on the stack on
> > every context switch.  We can be a little more clever and just use
> > tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> > context.
> > 
> > This requires us to have a scratch register though, so we take the
> > chance to rearrange some of the el1_sync code to only look at the
> > vttbr_el2 to determine if this is a trap from the guest or an HVC from
> > the host.  We do add an extra check to call the panic code if the kernel
> > is configured with debugging enabled and we saw a trap from the host
> > which wasn't an HVC, indicating that we left some EL2 trap configured by
> > mistake.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > ---
> >  arch/arm64/include/asm/kvm_asm.h | 20 ++++++++++++++++++++
> >  arch/arm64/kernel/asm-offsets.c  |  1 +
> >  arch/arm64/kvm/hyp/entry.S       |  5 +----
> >  arch/arm64/kvm/hyp/hyp-entry.S   | 39 ++++++++++++++++++---------------------
> >  arch/arm64/kvm/hyp/switch.c      |  2 +-
> >  5 files changed, 41 insertions(+), 26 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> > index ab4d0a9..7e48a39 100644
> > --- a/arch/arm64/include/asm/kvm_asm.h
> > +++ b/arch/arm64/include/asm/kvm_asm.h
> > @@ -70,4 +70,24 @@ extern u32 __init_stage2_translation(void);
> >  
> >  #endif
> >  
> > +#ifdef __ASSEMBLY__
> > +.macro get_host_ctxt reg, tmp
> > +	/*
> > +	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> > +	 * not be accessible by this address from EL2, hyp_panic() converts
> > +	 * it with kern_hyp_va() before use.
> > +	 */
> > +	ldr	\reg, =kvm_host_cpu_state
> > +	mrs	\tmp, tpidr_el2
> > +	add	\reg, \reg, \tmp
> > +	kern_hyp_va \reg
> > +.endm
> > +
> > +.macro get_vcpu vcpu, ctxt
> > +	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
> > +	kern_hyp_va	\vcpu
> > +.endm
> 
> To avoid the need for the pattern
> 
>   get_host_ctxt x0, x1
>   get_vcpu      x1, x0
> 
> everywhere this macro is used, how about defining it as
> 
>  .macro get_vcpu vcpu, tmp
>      get_host_ctxt \tmp, \vcpu    
>      ldr     \vcpu, [\tmp, #HOST_CONTEXT_VCPU]
>      kern_hyp_va     \vcpu
>  .endm
> 
> which also has the side-effect of tmp being ctxt after the call.
> 

I actually prefer it the way it is now, because I think it's clearer
what's going on.  In the past we did some information hiding in assembly
macros and that didn't improve maintenance.

If there's an aspect to this I'm missing, I'm of course open to
reconsiderig though.

> > +
> > +#endif
> > +
> >  #endif /* __ARM_KVM_ASM_H__ */
> > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> > index 71bf088..612021d 100644
> > --- a/arch/arm64/kernel/asm-offsets.c
> > +++ b/arch/arm64/kernel/asm-offsets.c
> > @@ -135,6 +135,7 @@ int main(void)
> >    DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
> >    DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
> >    DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
> > +  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
> >  #endif
> >  #ifdef CONFIG_CPU_PM
> >    DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
> > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> > index 9a8ab5d..76cd48f 100644
> > --- a/arch/arm64/kvm/hyp/entry.S
> > +++ b/arch/arm64/kvm/hyp/entry.S
> > @@ -62,9 +62,6 @@ ENTRY(__guest_enter)
> >  	// Store the host regs
> >  	save_callee_saved_regs x1
> >  
> > -	// Store host_ctxt and vcpu for use at exit time
> > -	stp	x1, x0, [sp, #-16]!
> > -
> >  	add	x18, x0, #VCPU_CONTEXT
> >  
> >  	// Restore guest regs x0-x17
> > @@ -119,7 +116,7 @@ ENTRY(__guest_exit)
> >  	save_callee_saved_regs x1
> >  
> >  	// Restore the host_ctxt from the stack
> > -	ldr	x2, [sp], #16
> > +	get_host_ctxt	x2, x3
> >  
> >  	// Now restore the host regs
> >  	restore_callee_saved_regs x2
> > diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> > index e4f37b9..2950f26 100644
> > --- a/arch/arm64/kvm/hyp/hyp-entry.S
> > +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> > @@ -56,19 +56,16 @@ ENDPROC(__vhe_hyp_call)
> >  el1_sync:				// Guest trapped into EL2
> >  	stp	x0, x1, [sp, #-16]!
> >  
> > -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> > -	mrs	x1, esr_el2
> > -alternative_else
> > -	mrs	x1, esr_el1
> > -alternative_endif
> > -	lsr	x0, x1, #ESR_ELx_EC_SHIFT
> > -
> > -	cmp	x0, #ESR_ELx_EC_HVC64
> > -	b.ne	el1_trap
> > -
> >  	mrs	x1, vttbr_el2		// If vttbr is valid, the 64bit guest
> >  	cbnz	x1, el1_trap		// called HVC
> >  
> > +#ifdef CONFIG_DEBUG
> > +	mrs	x0, esr_el2
> > +	lsr	x0, x0, #ESR_ELx_EC_SHIFT
> > +	cmp     x0, #ESR_ELx_EC_HVC64
> > +	b.ne    __hyp_panic
> > +#endif
> 
> The dropping of the "alternative_if_not ARM64_HAS_VIRT_HOST_EXTN" stuff
> isn't called out in the commit message, but it looks like it was just 
> a cleanup of code that was never necessary, as esr_el2 aliases esr_el1.
> Is that correct?
> 

I suppose 'rearranging some of the code' is not a very precise
description.  With VHE, using the 'mrs x1, esr_el1' instruction, will
actuall read ESR_EL2 into x1, and using the 'mrs x1, esr_el2'
instruction will have the same effect on VHE, so we might as well simply
use that.

I'll adjust the commit message.


Thanks for looking at this!
-Christoffer

> > +
> >  	/* Here, we're pretty sure the host called HVC. */
> >  	ldp	x0, x1, [sp], #16
> >  
> > @@ -101,10 +98,15 @@ alternative_endif
> >  	eret
> >  
> >  el1_trap:
> > +	get_host_ctxt	x0, x1
> > +	get_vcpu	x1, x0
> > +
> > +	mrs		x0, esr_el2
> > +	lsr		x0, x0, #ESR_ELx_EC_SHIFT
> >  	/*
> >  	 * x0: ESR_EC
> > +	 * x1: vcpu pointer
> >  	 */
> > -	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
> >  
> >  	/*
> >  	 * We trap the first access to the FP/SIMD to save the host context
> > @@ -122,13 +124,15 @@ alternative_else_nop_endif
> >  
> >  el1_irq:
> >  	stp     x0, x1, [sp, #-16]!
> > -	ldr	x1, [sp, #16 + 8]
> > +	get_host_ctxt	x0, x1
> > +	get_vcpu	x1, x0
> >  	mov	x0, #ARM_EXCEPTION_IRQ
> >  	b	__guest_exit
> >  
> >  el1_error:
> >  	stp     x0, x1, [sp, #-16]!
> > -	ldr	x1, [sp, #16 + 8]
> > +	get_host_ctxt	x0, x1
> > +	get_vcpu	x1, x0
> >  	mov	x0, #ARM_EXCEPTION_EL1_SERROR
> >  	b	__guest_exit
> >  
> > @@ -164,14 +168,7 @@ ENTRY(__hyp_do_panic)
> >  ENDPROC(__hyp_do_panic)
> >  
> >  ENTRY(__hyp_panic)
> > -	/*
> > -	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> > -	 * not be accessible by this address from EL2, hyp_panic() converts
> > -	 * it with kern_hyp_va() before use.
> > -	 */
> > -	ldr	x0, =kvm_host_cpu_state
> > -	mrs	x1, tpidr_el2
> > -	add	x0, x0, x1
> > +	get_host_ctxt x0, x1
> >  	b	hyp_panic
> >  ENDPROC(__hyp_panic)
> >  
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index 69ef24a..a0123ad 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -435,7 +435,7 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> >  	if (read_sysreg(vttbr_el2)) {
> >  		struct kvm_cpu_context *host_ctxt;
> >  
> > -		host_ctxt = kern_hyp_va(__host_ctxt);
> > +		host_ctxt = __host_ctxt;
> >  		vcpu = host_ctxt->__hyp_running_vcpu;
> >  		__timer_disable_traps(vcpu);
> >  		__deactivate_traps(vcpu);
> > -- 
> > 2.9.0
> >
Christoffer Dall Nov. 23, 2017, 8:59 p.m. UTC | #6
Hi Marc,

On Thu, Oct 12, 2017 at 04:49:44PM +0100, Marc Zyngier wrote:
> On 12/10/17 11:41, Christoffer Dall wrote:
> > We already have the percpu area for the host cpu state, which points to
> > the VCPU, so there's no need to store the VCPU pointer on the stack on
> > every context switch.  We can be a little more clever and just use
> > tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> > context.
> > 
> > This requires us to have a scratch register though, so we take the
> > chance to rearrange some of the el1_sync code to only look at the
> > vttbr_el2 to determine if this is a trap from the guest or an HVC from
> > the host.  We do add an extra check to call the panic code if the kernel
> > is configured with debugging enabled and we saw a trap from the host
> > which wasn't an HVC, indicating that we left some EL2 trap configured by
> > mistake.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > ---
> >  arch/arm64/include/asm/kvm_asm.h | 20 ++++++++++++++++++++
> >  arch/arm64/kernel/asm-offsets.c  |  1 +
> >  arch/arm64/kvm/hyp/entry.S       |  5 +----
> >  arch/arm64/kvm/hyp/hyp-entry.S   | 39 ++++++++++++++++++---------------------
> >  arch/arm64/kvm/hyp/switch.c      |  2 +-
> >  5 files changed, 41 insertions(+), 26 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> > index ab4d0a9..7e48a39 100644
> > --- a/arch/arm64/include/asm/kvm_asm.h
> > +++ b/arch/arm64/include/asm/kvm_asm.h
> > @@ -70,4 +70,24 @@ extern u32 __init_stage2_translation(void);
> >  
> >  #endif
> >  
> > +#ifdef __ASSEMBLY__
> > +.macro get_host_ctxt reg, tmp
> > +	/*
> > +	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> > +	 * not be accessible by this address from EL2, hyp_panic() converts
> > +	 * it with kern_hyp_va() before use.
> > +	 */
> 
> This really looks like a stale comment, as there is no hyp_panic
> involved here anymore (thankfully!).
> 
> > +	ldr	\reg, =kvm_host_cpu_state
> > +	mrs	\tmp, tpidr_el2
> > +	add	\reg, \reg, \tmp
> > +	kern_hyp_va \reg
> 
> Here, we're trading a load from the stack for a load from the constant
> pool. Can't we do something like:
> 
> 	adr_l	\reg, kvm_host_cpu_state
> 	msr	\tmp, tpidr_el2
> 	add	\reg, \reg, \tmp
> 
> and that's it? This relies on the property that the kernel/hyp offset is
> constant, and that it doesn't matter if we add the offset to a kernel VA
> or a HYP VA... Completely untested of course!
> 

Coming back to this one, annoyingly, it doesn't seem to work.  This is
the code I use for get_host_ctxt:

.macro get_host_ctxt reg, tmp
	adr_l	\reg, kvm_host_cpu_state
	mrs	\tmp, tpidr_el2
	add	\reg, \reg, \tmp
	kern_hyp_va \reg
.endm

And this is the disassembly for one of the uses in the hyp code:

	adrp	x0, ffff000008ca9000 <overflow_stack+0xd20>
	add	x0, x0, #0x7f0
	mrs	x1, tpidr_el2
	add	x0, x0, x1
	and	x0, x0, #0xffffffffffff
	
For comparison, the following C-code:

	struct kvm_cpu_context *host_ctxt;
	host_ctxt = this_cpu_ptr(&kvm_host_cpu_state);
	host_ctxt = kern_hyp_va(host_ctxt);

Gets compiled into this:

	adrp	x0, ffff000008ca9000 <overflow_stack+0xd20>
	add	x0, x0, #0x7d0
	mrs	x1, tpidr_el1
	add	x0, x0, #0x20
	add	x0, x0, x1
	and	x0, x0, #0xffffffffffff

And, during hyp init we do:
	mrs	x1, tpidr_el1
	msr	tpidr_el2, x1

Any ideas what could be going on here?

Thanks,
-Christoffer
James Morse Nov. 27, 2017, 11:11 a.m. UTC | #7
Hi Christoffer,

On 23/11/17 20:59, Christoffer Dall wrote:
> On Thu, Oct 12, 2017 at 04:49:44PM +0100, Marc Zyngier wrote:
>> On 12/10/17 11:41, Christoffer Dall wrote:
>>> We already have the percpu area for the host cpu state, which points to
>>> the VCPU, so there's no need to store the VCPU pointer on the stack on
>>> every context switch.  We can be a little more clever and just use
>>> tpidr_el2 for the percpu offset and load the VCPU pointer from the host
>>> context.
>>>
>>> This requires us to have a scratch register though, so we take the
>>> chance to rearrange some of the el1_sync code to only look at the
>>> vttbr_el2 to determine if this is a trap from the guest or an HVC from
>>> the host.  We do add an extra check to call the panic code if the kernel
>>> is configured with debugging enabled and we saw a trap from the host
>>> which wasn't an HVC, indicating that we left some EL2 trap configured by
>>> mistake.

>>> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
>>> index ab4d0a9..7e48a39 100644
>>> --- a/arch/arm64/include/asm/kvm_asm.h
>>> +++ b/arch/arm64/include/asm/kvm_asm.h
>>> @@ -70,4 +70,24 @@ extern u32 __init_stage2_translation(void);
>>>  
>>>  #endif
>>>  
>>> +#ifdef __ASSEMBLY__
>>> +.macro get_host_ctxt reg, tmp
>>> +	/*
>>> +	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
>>> +	 * not be accessible by this address from EL2, hyp_panic() converts
>>> +	 * it with kern_hyp_va() before use.
>>> +	 */
>>
>> This really looks like a stale comment, as there is no hyp_panic
>> involved here anymore (thankfully!).
>>
>>> +	ldr	\reg, =kvm_host_cpu_state
>>> +	mrs	\tmp, tpidr_el2
>>> +	add	\reg, \reg, \tmp

This looks like the arch code's adr_this_cpu.


>>> +	kern_hyp_va \reg
>>
>> Here, we're trading a load from the stack for a load from the constant
>> pool. Can't we do something like:
>>
>> 	adr_l	\reg, kvm_host_cpu_state
>> 	msr	\tmp, tpidr_el2
>> 	add	\reg, \reg, \tmp
>>
>> and that's it? This relies on the property that the kernel/hyp offset is
>> constant, and that it doesn't matter if we add the offset to a kernel VA
>> or a HYP VA... Completely untested of course!
>>
> 
> Coming back to this one, annoyingly, it doesn't seem to work. 

The disassembly looks wrong?, or it generates the wrong address?


> This is the code I use for get_host_ctxt:
> 
> .macro get_host_ctxt reg, tmp
> 	adr_l	\reg, kvm_host_cpu_state
> 	mrs	\tmp, tpidr_el2
> 	add	\reg, \reg, \tmp

(adr_this_cpu)

> 	kern_hyp_va \reg

As we know adr_l used adrp to generate a PC-relative address, when executed at
EL2 it should always generate an EL2 address, so the kern_hyp_va will just mask
out some bits that are already zero.

(this subtly depends on KVM's EL2 code not being a module, and
kvm_host_cpu_state not being percpu_alloc()d)


> .endm
> 
> And this is the disassembly for one of the uses in the hyp code:
> 
> 	adrp	x0, ffff000008ca9000 <overflow_stack+0xd20>
> 	add	x0, x0, #0x7f0
> 	mrs	x1, tpidr_el2
> 	add	x0, x0, x1
> 	and	x0, x0, #0xffffffffffff

(that looks right to me).


> For comparison, the following C-code:
> 
> 	struct kvm_cpu_context *host_ctxt;
> 	host_ctxt = this_cpu_ptr(&kvm_host_cpu_state);
> 	host_ctxt = kern_hyp_va(host_ctxt);
> 
> Gets compiled into this:
> 
> 	adrp	x0, ffff000008ca9000 <overflow_stack+0xd20>
> 	add	x0, x0, #0x7d0
> 	mrs	x1, tpidr_el1
> 	add	x0, x0, #0x20
> 	add	x0, x0, x1
> 	and	x0, x0, #0xffffffffffff

> Any ideas what could be going on here?

You expected tpidr_el2 in the above disassembly?

The patch 'arm64: alternatives: use tpidr_el2 on VHE hosts'[0] wraps the tpidr
access in adr_this_cpu,ldr_this_cpu and __my_cpu_offset() in
ARM64_HAS_VIRT_HOST_EXTN alternatives.

You should have an altinstr_replacement section that contains the 'mrs x1,
tpidr_el2' for this sequence, which will get patched in by the cpufeature code
when we find VHE.


I'm guessing you want to always use tpidr_el2 as cpu_offset for KVM, even on
v8.0 hardware. To do this you can't use the kernel's 'this_cpu_ptr' as its
defined in percpu-defs.h as:
> SHIFT_PERCPU_PTR(ptr, my_cpu_offset)

... and the arch code provides a static-inline 'my_cpu_offset' that resolves to
the correct tpidr for EL1.

I guess you need an asm-accessor for each per-cpu variable you want to access,
or a kvm_this_per_cpu().


> And, during hyp init we do:
> 	mrs	x1, tpidr_el1
> 	msr	tpidr_el2, x1

In the SDEI series this was so that the asm that used tpidr_el2 directly had the
correct value on non-VHE hardware.


Thanks,

James


[0] https://patchwork.kernel.org/patch/10012641/
Christoffer Dall Nov. 29, 2017, 6:20 p.m. UTC | #8
Hi James,

On Mon, Nov 27, 2017 at 11:11:20AM +0000, James Morse wrote:
> On 23/11/17 20:59, Christoffer Dall wrote:
> > On Thu, Oct 12, 2017 at 04:49:44PM +0100, Marc Zyngier wrote:
> >> On 12/10/17 11:41, Christoffer Dall wrote:
> >>> We already have the percpu area for the host cpu state, which points to
> >>> the VCPU, so there's no need to store the VCPU pointer on the stack on
> >>> every context switch.  We can be a little more clever and just use
> >>> tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> >>> context.
> >>>
> >>> This requires us to have a scratch register though, so we take the
> >>> chance to rearrange some of the el1_sync code to only look at the
> >>> vttbr_el2 to determine if this is a trap from the guest or an HVC from
> >>> the host.  We do add an extra check to call the panic code if the kernel
> >>> is configured with debugging enabled and we saw a trap from the host
> >>> which wasn't an HVC, indicating that we left some EL2 trap configured by
> >>> mistake.
> 
> >>> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> >>> index ab4d0a9..7e48a39 100644
> >>> --- a/arch/arm64/include/asm/kvm_asm.h
> >>> +++ b/arch/arm64/include/asm/kvm_asm.h
> >>> @@ -70,4 +70,24 @@ extern u32 __init_stage2_translation(void);
> >>>  
> >>>  #endif
> >>>  
> >>> +#ifdef __ASSEMBLY__
> >>> +.macro get_host_ctxt reg, tmp
> >>> +	/*
> >>> +	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> >>> +	 * not be accessible by this address from EL2, hyp_panic() converts
> >>> +	 * it with kern_hyp_va() before use.
> >>> +	 */
> >>
> >> This really looks like a stale comment, as there is no hyp_panic
> >> involved here anymore (thankfully!).
> >>
> >>> +	ldr	\reg, =kvm_host_cpu_state
> >>> +	mrs	\tmp, tpidr_el2
> >>> +	add	\reg, \reg, \tmp
> 
> This looks like the arch code's adr_this_cpu.
> 
> 
> >>> +	kern_hyp_va \reg
> >>
> >> Here, we're trading a load from the stack for a load from the constant
> >> pool. Can't we do something like:
> >>
> >> 	adr_l	\reg, kvm_host_cpu_state
> >> 	msr	\tmp, tpidr_el2
> >> 	add	\reg, \reg, \tmp
> >>
> >> and that's it? This relies on the property that the kernel/hyp offset is
> >> constant, and that it doesn't matter if we add the offset to a kernel VA
> >> or a HYP VA... Completely untested of course!
> >>
> > 
> > Coming back to this one, annoyingly, it doesn't seem to work. 
> 
> The disassembly looks wrong?, or it generates the wrong address?
> 

The assembly above was just something Marc suggested.  I think it's
wrong (it's should be mrs, not msr in the second line), but I just took
it as inspiration, so that's not part of the problem at hand.  Sorry for
the confusion.

> 
> > This is the code I use for get_host_ctxt:
> > 
> > .macro get_host_ctxt reg, tmp
> > 	adr_l	\reg, kvm_host_cpu_state
> > 	mrs	\tmp, tpidr_el2
> > 	add	\reg, \reg, \tmp
> 
> (adr_this_cpu)
> 
> > 	kern_hyp_va \reg
> 
> As we know adr_l used adrp to generate a PC-relative address, when executed at
> EL2 it should always generate an EL2 address, so the kern_hyp_va will just mask
> out some bits that are already zero.

yes, that's right

> 
> (this subtly depends on KVM's EL2 code not being a module, and
> kvm_host_cpu_state not being percpu_alloc()d)
> 
> 

yes, and I have your "KVM: arm/arm64: Convert kvm_host_cpu_state to a
static per-cpu allocation" patch.

> > .endm
> > 
> > And this is the disassembly for one of the uses in the hyp code:
> > 
> > 	adrp	x0, ffff000008ca9000 <overflow_stack+0xd20>
> > 	add	x0, x0, #0x7f0
> > 	mrs	x1, tpidr_el2
> > 	add	x0, x0, x1
> > 	and	x0, x0, #0xffffffffffff
> 
> (that looks right to me).
> 
> 

to me too, but it doesn't work :(

> > For comparison, the following C-code:
> > 
> > 	struct kvm_cpu_context *host_ctxt;
> > 	host_ctxt = this_cpu_ptr(&kvm_host_cpu_state);
> > 	host_ctxt = kern_hyp_va(host_ctxt);
> > 
> > Gets compiled into this:
> > 
> > 	adrp	x0, ffff000008ca9000 <overflow_stack+0xd20>
> > 	add	x0, x0, #0x7d0
> > 	mrs	x1, tpidr_el1
> > 	add	x0, x0, #0x20
> > 	add	x0, x0, x1
> > 	and	x0, x0, #0xffffffffffff
> 
> > Any ideas what could be going on here?
> 
> You expected tpidr_el2 in the above disassembly?

No, because I'm not on a VHE host, but I expect tpidr_el1 and tpidr_el2
to be the same in the hyp code.

I now realize that I never said that this breaks on a non-VHE host, I
haven't actually tried a VHE host, but it shouldn't matter.

> 
> The patch 'arm64: alternatives: use tpidr_el2 on VHE hosts'[0] wraps the tpidr
> access in adr_this_cpu,ldr_this_cpu and __my_cpu_offset() in
> ARM64_HAS_VIRT_HOST_EXTN alternatives.
> 
> You should have an altinstr_replacement section that contains the 'mrs x1,
> tpidr_el2' for this sequence, which will get patched in by the cpufeature code
> when we find VHE.
> 

Yes, I think all that is fine.

> 
> I'm guessing you want to always use tpidr_el2 as cpu_offset for KVM, even on
> v8.0 hardware. To do this you can't use the kernel's 'this_cpu_ptr' as its
> defined in percpu-defs.h as:
> > SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
> 
> ... and the arch code provides a static-inline 'my_cpu_offset' that resolves to
> the correct tpidr for EL1.
> 
> I guess you need an asm-accessor for each per-cpu variable you want to access,
> or a kvm_this_per_cpu().
> 

I was under the impression that we were essentially open-coding this
functionality with the assembly above.  What did I miss?

> 
> > And, during hyp init we do:
> > 	mrs	x1, tpidr_el1
> > 	msr	tpidr_el2, x1
> 
> In the SDEI series this was so that the asm that used tpidr_el2 directly had the
> correct value on non-VHE hardware.
> 
> 
Yes, and I simply generalized that bit of assembly (the hyp panic logic)
which also needed the vcpu context to all the assembly that needs the
vcpu context.

And it works fine with a load from the constant pool and the mask, but
not with the open-coded this_cpu_ptr() at EL2.  On a non-VHE system.
Even though the assembly seems identical, and it should just work (TM).

Thoughts?

Thanks,
-Christoffer
diff mbox

Patch

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ab4d0a9..7e48a39 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -70,4 +70,24 @@  extern u32 __init_stage2_translation(void);
 
 #endif
 
+#ifdef __ASSEMBLY__
+.macro get_host_ctxt reg, tmp
+	/*
+	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
+	 * not be accessible by this address from EL2, hyp_panic() converts
+	 * it with kern_hyp_va() before use.
+	 */
+	ldr	\reg, =kvm_host_cpu_state
+	mrs	\tmp, tpidr_el2
+	add	\reg, \reg, \tmp
+	kern_hyp_va \reg
+.endm
+
+.macro get_vcpu vcpu, ctxt
+	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+	kern_hyp_va	\vcpu
+.endm
+
+#endif
+
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 71bf088..612021d 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -135,6 +135,7 @@  int main(void)
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
   DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
 #endif
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 9a8ab5d..76cd48f 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,9 +62,6 @@  ENTRY(__guest_enter)
 	// Store the host regs
 	save_callee_saved_regs x1
 
-	// Store host_ctxt and vcpu for use at exit time
-	stp	x1, x0, [sp, #-16]!
-
 	add	x18, x0, #VCPU_CONTEXT
 
 	// Restore guest regs x0-x17
@@ -119,7 +116,7 @@  ENTRY(__guest_exit)
 	save_callee_saved_regs x1
 
 	// Restore the host_ctxt from the stack
-	ldr	x2, [sp], #16
+	get_host_ctxt	x2, x3
 
 	// Now restore the host regs
 	restore_callee_saved_regs x2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index e4f37b9..2950f26 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -56,19 +56,16 @@  ENDPROC(__vhe_hyp_call)
 el1_sync:				// Guest trapped into EL2
 	stp	x0, x1, [sp, #-16]!
 
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x1, esr_el2
-alternative_else
-	mrs	x1, esr_el1
-alternative_endif
-	lsr	x0, x1, #ESR_ELx_EC_SHIFT
-
-	cmp	x0, #ESR_ELx_EC_HVC64
-	b.ne	el1_trap
-
 	mrs	x1, vttbr_el2		// If vttbr is valid, the 64bit guest
 	cbnz	x1, el1_trap		// called HVC
 
+#ifdef CONFIG_DEBUG
+	mrs	x0, esr_el2
+	lsr	x0, x0, #ESR_ELx_EC_SHIFT
+	cmp     x0, #ESR_ELx_EC_HVC64
+	b.ne    __hyp_panic
+#endif
+
 	/* Here, we're pretty sure the host called HVC. */
 	ldp	x0, x1, [sp], #16
 
@@ -101,10 +98,15 @@  alternative_endif
 	eret
 
 el1_trap:
+	get_host_ctxt	x0, x1
+	get_vcpu	x1, x0
+
+	mrs		x0, esr_el2
+	lsr		x0, x0, #ESR_ELx_EC_SHIFT
 	/*
 	 * x0: ESR_EC
+	 * x1: vcpu pointer
 	 */
-	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
 
 	/*
 	 * We trap the first access to the FP/SIMD to save the host context
@@ -122,13 +124,15 @@  alternative_else_nop_endif
 
 el1_irq:
 	stp     x0, x1, [sp, #-16]!
-	ldr	x1, [sp, #16 + 8]
+	get_host_ctxt	x0, x1
+	get_vcpu	x1, x0
 	mov	x0, #ARM_EXCEPTION_IRQ
 	b	__guest_exit
 
 el1_error:
 	stp     x0, x1, [sp, #-16]!
-	ldr	x1, [sp, #16 + 8]
+	get_host_ctxt	x0, x1
+	get_vcpu	x1, x0
 	mov	x0, #ARM_EXCEPTION_EL1_SERROR
 	b	__guest_exit
 
@@ -164,14 +168,7 @@  ENTRY(__hyp_do_panic)
 ENDPROC(__hyp_do_panic)
 
 ENTRY(__hyp_panic)
-	/*
-	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
-	 * not be accessible by this address from EL2, hyp_panic() converts
-	 * it with kern_hyp_va() before use.
-	 */
-	ldr	x0, =kvm_host_cpu_state
-	mrs	x1, tpidr_el2
-	add	x0, x0, x1
+	get_host_ctxt x0, x1
 	b	hyp_panic
 ENDPROC(__hyp_panic)
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 69ef24a..a0123ad 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -435,7 +435,7 @@  void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
 	if (read_sysreg(vttbr_el2)) {
 		struct kvm_cpu_context *host_ctxt;
 
-		host_ctxt = kern_hyp_va(__host_ctxt);
+		host_ctxt = __host_ctxt;
 		vcpu = host_ctxt->__hyp_running_vcpu;
 		__timer_disable_traps(vcpu);
 		__deactivate_traps(vcpu);