diff mbox

[v4,03/40] KVM: arm64: Avoid storing the vcpu pointer on the stack

Message ID 20180215210332.8648-4-christoffer.dall@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Christoffer Dall Feb. 15, 2018, 9:02 p.m. UTC
We already have the percpu area for the host cpu state, which points to
the VCPU, so there's no need to store the VCPU pointer on the stack on
every context switch.  We can be a little more clever and just use
tpidr_el2 for the percpu offset and load the VCPU pointer from the host
context.

This does require us to calculate the percpu offset without including
the offset from the kernel mapping of the percpu array to the linear
mapping of the array (which is what we store in tpidr_el1), because a
PC-relative generated address in EL2 is already giving us the hyp alias
of the linear mapping of a kernel address.  We do this in
__cpu_init_hyp_mode() by using kvm_ksym_ref().

This change also requires us to have a scratch register, so we take the
chance to rearrange some of the el1_sync code to only look at the
vttbr_el2 to determine if this is a trap from the guest or an HVC from
the host.  We do add an extra check to call the panic code if the kernel
is configured with debugging enabled and we saw a trap from the host
which wasn't an HVC, indicating that we left some EL2 trap configured by
mistake.

The code that accesses ESR_EL2 was previously using an alternative to
use the _EL1 accessor on VHE systems, but this was actually unnecessary
as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
accessor does the same thing on both systems.

Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---

Notes:
    Changes since v3:
     - Reworked the assembly part of the patch after rebasing on v4.16-rc1
       which created a conflict with the variant 2 mitigations.
     - Removed Marc's reviewed-by due to the rework.
     - Removed unneeded extern keyword in declaration in header file
    
    Changes since v1:
     - Use PC-relative addressing to access per-cpu variables instead of
       using a load from the literal pool.
     - Remove stale comments as pointed out by Marc
     - Reworded the commit message as suggested by Drew

 arch/arm64/include/asm/kvm_asm.h  | 14 ++++++++++++++
 arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++
 arch/arm64/kernel/asm-offsets.c   |  1 +
 arch/arm64/kvm/hyp/entry.S        |  6 +-----
 arch/arm64/kvm/hyp/hyp-entry.S    | 31 +++++++++++++------------------
 arch/arm64/kvm/hyp/switch.c       |  5 +----
 arch/arm64/kvm/hyp/sysreg-sr.c    |  5 +++++
 7 files changed, 50 insertions(+), 27 deletions(-)

Comments

Julien Grall Feb. 19, 2018, 3:50 p.m. UTC | #1
Hi Christoffer,

On 15/02/18 21:02, Christoffer Dall wrote:
> We already have the percpu area for the host cpu state, which points to
> the VCPU, so there's no need to store the VCPU pointer on the stack on
> every context switch.  We can be a little more clever and just use
> tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> context.
> 
> This does require us to calculate the percpu offset without including
> the offset from the kernel mapping of the percpu array to the linear
> mapping of the array (which is what we store in tpidr_el1), because a
> PC-relative generated address in EL2 is already giving us the hyp alias
> of the linear mapping of a kernel address.  We do this in
> __cpu_init_hyp_mode() by using kvm_ksym_ref().
> 
> This change also requires us to have a scratch register, so we take the
> chance to rearrange some of the el1_sync code to only look at the
> vttbr_el2 to determine if this is a trap from the guest or an HVC from
> the host.  We do add an extra check to call the panic code if the kernel
> is configured with debugging enabled and we saw a trap from the host
> which wasn't an HVC, indicating that we left some EL2 trap configured by
> mistake.

You might want to remove this paragraph as you don't seem to have rework 
that part of the code in this version.

Cheers,
Marc Zyngier Feb. 21, 2018, 11:34 a.m. UTC | #2
On Thu, 15 Feb 2018 21:02:55 +0000,
Christoffer Dall wrote:
> 
> We already have the percpu area for the host cpu state, which points to
> the VCPU, so there's no need to store the VCPU pointer on the stack on
> every context switch.  We can be a little more clever and just use
> tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> context.
> 
> This does require us to calculate the percpu offset without including
> the offset from the kernel mapping of the percpu array to the linear
> mapping of the array (which is what we store in tpidr_el1), because a
> PC-relative generated address in EL2 is already giving us the hyp alias
> of the linear mapping of a kernel address.  We do this in
> __cpu_init_hyp_mode() by using kvm_ksym_ref().
> 
> This change also requires us to have a scratch register, so we take the
> chance to rearrange some of the el1_sync code to only look at the
> vttbr_el2 to determine if this is a trap from the guest or an HVC from
> the host.  We do add an extra check to call the panic code if the kernel
> is configured with debugging enabled and we saw a trap from the host
> which wasn't an HVC, indicating that we left some EL2 trap configured by
> mistake.
> 
> The code that accesses ESR_EL2 was previously using an alternative to
> use the _EL1 accessor on VHE systems, but this was actually unnecessary
> as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
> accessor does the same thing on both systems.
> 
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> ---
> 
> Notes:
>     Changes since v3:
>      - Reworked the assembly part of the patch after rebasing on v4.16-rc1
>        which created a conflict with the variant 2 mitigations.
>      - Removed Marc's reviewed-by due to the rework.
>      - Removed unneeded extern keyword in declaration in header file
>     
>     Changes since v1:
>      - Use PC-relative addressing to access per-cpu variables instead of
>        using a load from the literal pool.
>      - Remove stale comments as pointed out by Marc
>      - Reworded the commit message as suggested by Drew
> 
>  arch/arm64/include/asm/kvm_asm.h  | 14 ++++++++++++++
>  arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++
>  arch/arm64/kernel/asm-offsets.c   |  1 +
>  arch/arm64/kvm/hyp/entry.S        |  6 +-----
>  arch/arm64/kvm/hyp/hyp-entry.S    | 31 +++++++++++++------------------
>  arch/arm64/kvm/hyp/switch.c       |  5 +----
>  arch/arm64/kvm/hyp/sysreg-sr.c    |  5 +++++
>  7 files changed, 50 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 24961b732e65..6b626750b0a1 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -33,6 +33,7 @@
>  #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
>  #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
>  
> +/* Translate a kernel address of @sym into its equivalent linear mapping */
>  #define kvm_ksym_ref(sym)						\
>  	({								\
>  		void *val = &sym;					\
> @@ -70,6 +71,19 @@ extern u32 __init_stage2_translation(void);
>  
>  extern void __qcom_hyp_sanitize_btac_predictors(void);
>  
> +#else /* __ASSEMBLY__ */
> +
> +.macro get_host_ctxt reg, tmp
> +	adr_l	\reg, kvm_host_cpu_state
> +	mrs	\tmp, tpidr_el2
> +	add	\reg, \reg, \tmp
> +.endm
> +
> +.macro get_vcpu vcpu, ctxt
> +	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
> +	kern_hyp_va	\vcpu
> +.endm
> +
>  #endif
>  
>  #endif /* __ARM_KVM_ASM_H__ */
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 596f8e414a4c..618cfee7206a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -358,10 +358,15 @@ int kvm_perf_teardown(void);
>  
>  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
>  
> +void __kvm_set_tpidr_el2(u64 tpidr_el2);
> +DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
> +
>  static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
>  				       unsigned long hyp_stack_ptr,
>  				       unsigned long vector_ptr)
>  {
> +	u64 tpidr_el2;
> +
>  	/*
>  	 * Call initialization code, and switch to the full blown HYP code.
>  	 * If the cpucaps haven't been finalized yet, something has gone very
> @@ -370,6 +375,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
>  	 */
>  	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
>  	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
> +
> +	/*
> +	 * Calculate the raw per-cpu offset without a translation from the
> +	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
> +	 * so that we can use adr_l to access per-cpu variables in EL2.
> +	 */
> +	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
> +		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
> +
> +	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
>  }
>  
>  static inline void kvm_arch_hardware_unsetup(void) {}
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 1303e04110cd..78e1b0a70aaf 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -138,6 +138,7 @@ int main(void)
>    DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
>    DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
>    DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
> +  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
>  #endif
>  #ifdef CONFIG_CPU_PM
>    DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> index fdd1068ee3a5..1f458f7c3b44 100644
> --- a/arch/arm64/kvm/hyp/entry.S
> +++ b/arch/arm64/kvm/hyp/entry.S
> @@ -62,9 +62,6 @@ ENTRY(__guest_enter)
>  	// Store the host regs
>  	save_callee_saved_regs x1
>  
> -	// Store host_ctxt and vcpu for use at exit time
> -	stp	x1, x0, [sp, #-16]!
> -
>  	add	x18, x0, #VCPU_CONTEXT
>  
>  	// Restore guest regs x0-x17
> @@ -118,8 +115,7 @@ ENTRY(__guest_exit)
>  	// Store the guest regs x19-x29, lr
>  	save_callee_saved_regs x1
>  
> -	// Restore the host_ctxt from the stack
> -	ldr	x2, [sp], #16
> +	get_host_ctxt	x2, x3
>  
>  	// Now restore the host regs
>  	restore_callee_saved_regs x2
> diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> index f36464bd57c5..559b4d54bc42 100644
> --- a/arch/arm64/kvm/hyp/hyp-entry.S
> +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> @@ -57,13 +57,8 @@ ENDPROC(__vhe_hyp_call)
>  el1_sync:				// Guest trapped into EL2
>  	stp	x0, x1, [sp, #-16]!
>  
> -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> -	mrs	x1, esr_el2
> -alternative_else
> -	mrs	x1, esr_el1
> -alternative_endif
> -	lsr	x0, x1, #ESR_ELx_EC_SHIFT
> -
> +	mrs	x0, esr_el2
> +	lsr	x0, x0, #ESR_ELx_EC_SHIFT
>  	cmp	x0, #ESR_ELx_EC_HVC64
>  	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
>  	b.ne	el1_trap
> @@ -117,10 +112,15 @@ el1_hvc_guest:
>  	eret
>  
>  el1_trap:
> +	get_host_ctxt	x0, x1
> +	get_vcpu	x1, x0
> +
> +	mrs		x0, esr_el2
> +	lsr		x0, x0, #ESR_ELx_EC_SHIFT
>  	/*
>  	 * x0: ESR_EC
> +	 * x1: vcpu pointer
>  	 */
> -	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
>  
>  	/*
>  	 * We trap the first access to the FP/SIMD to save the host context
> @@ -138,13 +138,15 @@ alternative_else_nop_endif
>  
>  el1_irq:
>  	stp     x0, x1, [sp, #-16]!
> -	ldr	x1, [sp, #16 + 8]
> +	get_host_ctxt	x0, x1
> +	get_vcpu	x1, x0
>  	mov	x0, #ARM_EXCEPTION_IRQ
>  	b	__guest_exit
>  
>  el1_error:
>  	stp     x0, x1, [sp, #-16]!
> -	ldr	x1, [sp, #16 + 8]
> +	get_host_ctxt	x0, x1
> +	get_vcpu	x1, x0

Given how frequent this construct is, would there be a benefit in
having something like "get_vcpu_ptr" that conflates the two macros? We
don't seem to have a single case of using get_vcpu on its own.

>  	mov	x0, #ARM_EXCEPTION_EL1_SERROR
>  	b	__guest_exit
>  
> @@ -180,14 +182,7 @@ ENTRY(__hyp_do_panic)
>  ENDPROC(__hyp_do_panic)
>  
>  ENTRY(__hyp_panic)
> -	/*
> -	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> -	 * not be accessible by this address from EL2, hyp_panic() converts
> -	 * it with kern_hyp_va() before use.
> -	 */
> -	ldr	x0, =kvm_host_cpu_state
> -	mrs	x1, tpidr_el2
> -	add	x0, x0, x1
> +	get_host_ctxt x0, x1
>  	b	hyp_panic
>  ENDPROC(__hyp_panic)
>  
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index b991f85c1133..d1749fa0bfc3 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -467,7 +467,7 @@ static hyp_alternate_select(__hyp_call_panic,
>  			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
>  			    ARM64_HAS_VIRT_HOST_EXTN);
>  
> -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
>  {
>  	struct kvm_vcpu *vcpu = NULL;
>  
> @@ -476,9 +476,6 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
>  	u64 par = read_sysreg(par_el1);
>  
>  	if (read_sysreg(vttbr_el2)) {
> -		struct kvm_cpu_context *host_ctxt;
> -
> -		host_ctxt = kern_hyp_va(__host_ctxt);
>  		vcpu = host_ctxt->__hyp_running_vcpu;
>  		__timer_disable_traps(vcpu);
>  		__deactivate_traps(vcpu);
> diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
> index 2c17afd2be96..43b7dd65e3e6 100644
> --- a/arch/arm64/kvm/hyp/sysreg-sr.c
> +++ b/arch/arm64/kvm/hyp/sysreg-sr.c
> @@ -189,3 +189,8 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
>  	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
>  		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
>  }
> +
> +void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
> +{
> +	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));

The paranoid in me says that we'd want an ISB here if we can possibly
use tpidr_el2 on this path. If we had to manic, for example...

> +}
> -- 
> 2.14.2
> 

The above notwithstanding,

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>

	M.
Andrew Jones Feb. 21, 2018, 5:32 p.m. UTC | #3
On Thu, Feb 15, 2018 at 10:02:55PM +0100, Christoffer Dall wrote:
> We already have the percpu area for the host cpu state, which points to
> the VCPU, so there's no need to store the VCPU pointer on the stack on
> every context switch.  We can be a little more clever and just use
> tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> context.
> 
> This does require us to calculate the percpu offset without including
> the offset from the kernel mapping of the percpu array to the linear
> mapping of the array (which is what we store in tpidr_el1), because a
> PC-relative generated address in EL2 is already giving us the hyp alias
> of the linear mapping of a kernel address.  We do this in
> __cpu_init_hyp_mode() by using kvm_ksym_ref().
> 
> This change also requires us to have a scratch register, so we take the
> chance to rearrange some of the el1_sync code to only look at the
> vttbr_el2 to determine if this is a trap from the guest or an HVC from
> the host.  We do add an extra check to call the panic code if the kernel
> is configured with debugging enabled and we saw a trap from the host
> which wasn't an HVC, indicating that we left some EL2 trap configured by
> mistake.
> 
> The code that accesses ESR_EL2 was previously using an alternative to
> use the _EL1 accessor on VHE systems, but this was actually unnecessary
> as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
> accessor does the same thing on both systems.
> 
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> ---
> 
> Notes:
>     Changes since v3:
>      - Reworked the assembly part of the patch after rebasing on v4.16-rc1
>        which created a conflict with the variant 2 mitigations.
>      - Removed Marc's reviewed-by due to the rework.
>      - Removed unneeded extern keyword in declaration in header file
>     
>     Changes since v1:
>      - Use PC-relative addressing to access per-cpu variables instead of
>        using a load from the literal pool.
>      - Remove stale comments as pointed out by Marc
>      - Reworded the commit message as suggested by Drew
> 
>  arch/arm64/include/asm/kvm_asm.h  | 14 ++++++++++++++
>  arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++
>  arch/arm64/kernel/asm-offsets.c   |  1 +
>  arch/arm64/kvm/hyp/entry.S        |  6 +-----
>  arch/arm64/kvm/hyp/hyp-entry.S    | 31 +++++++++++++------------------
>  arch/arm64/kvm/hyp/switch.c       |  5 +----
>  arch/arm64/kvm/hyp/sysreg-sr.c    |  5 +++++
>  7 files changed, 50 insertions(+), 27 deletions(-)
> 

I'm not clear on the motivation for this patch. I assumed it enabled
simpler patches later in the series, but I did a bit of reading ahead
and didn't see anything obvious. I doubt it gives a speedup, so is it
just to avoid stack use? Making it easier to maintain these assembly
functions that span a couple files? If so, should it be posted separately
from this series? If not, could you please add some more text to the
commit message helping me better understand the full motivation?

Besides my confusion on motivation, it looks good to me

Reviewed-by: Andrew Jones <drjones@redhat.com>

Thanks,
drew
Christoffer Dall Feb. 22, 2018, 8:43 a.m. UTC | #4
On Mon, Feb 19, 2018 at 03:50:20PM +0000, Julien Grall wrote:
> Hi Christoffer,
> 
> On 15/02/18 21:02, Christoffer Dall wrote:
> >We already have the percpu area for the host cpu state, which points to
> >the VCPU, so there's no need to store the VCPU pointer on the stack on
> >every context switch.  We can be a little more clever and just use
> >tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> >context.
> >
> >This does require us to calculate the percpu offset without including
> >the offset from the kernel mapping of the percpu array to the linear
> >mapping of the array (which is what we store in tpidr_el1), because a
> >PC-relative generated address in EL2 is already giving us the hyp alias
> >of the linear mapping of a kernel address.  We do this in
> >__cpu_init_hyp_mode() by using kvm_ksym_ref().
> >
> >This change also requires us to have a scratch register, so we take the
> >chance to rearrange some of the el1_sync code to only look at the
> >vttbr_el2 to determine if this is a trap from the guest or an HVC from
> >the host.  We do add an extra check to call the panic code if the kernel
> >is configured with debugging enabled and we saw a trap from the host
> >which wasn't an HVC, indicating that we left some EL2 trap configured by
> >mistake.
> 
> You might want to remove this paragraph as you don't seem to have rework
> that part of the code in this version.
> 
Yes, will do.

Thanks,
-Christoffer
Christoffer Dall Feb. 22, 2018, 9:02 a.m. UTC | #5
On Wed, Feb 21, 2018 at 11:34:07AM +0000, Marc Zyngier wrote:
> On Thu, 15 Feb 2018 21:02:55 +0000,
> Christoffer Dall wrote:
> > 
> > We already have the percpu area for the host cpu state, which points to
> > the VCPU, so there's no need to store the VCPU pointer on the stack on
> > every context switch.  We can be a little more clever and just use
> > tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> > context.
> > 
> > This does require us to calculate the percpu offset without including
> > the offset from the kernel mapping of the percpu array to the linear
> > mapping of the array (which is what we store in tpidr_el1), because a
> > PC-relative generated address in EL2 is already giving us the hyp alias
> > of the linear mapping of a kernel address.  We do this in
> > __cpu_init_hyp_mode() by using kvm_ksym_ref().
> > 
> > This change also requires us to have a scratch register, so we take the
> > chance to rearrange some of the el1_sync code to only look at the
> > vttbr_el2 to determine if this is a trap from the guest or an HVC from
> > the host.  We do add an extra check to call the panic code if the kernel
> > is configured with debugging enabled and we saw a trap from the host
> > which wasn't an HVC, indicating that we left some EL2 trap configured by
> > mistake.
> > 
> > The code that accesses ESR_EL2 was previously using an alternative to
> > use the _EL1 accessor on VHE systems, but this was actually unnecessary
> > as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
> > accessor does the same thing on both systems.
> > 
> > Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > ---
> > 
> > Notes:
> >     Changes since v3:
> >      - Reworked the assembly part of the patch after rebasing on v4.16-rc1
> >        which created a conflict with the variant 2 mitigations.
> >      - Removed Marc's reviewed-by due to the rework.
> >      - Removed unneeded extern keyword in declaration in header file
> >     
> >     Changes since v1:
> >      - Use PC-relative addressing to access per-cpu variables instead of
> >        using a load from the literal pool.
> >      - Remove stale comments as pointed out by Marc
> >      - Reworded the commit message as suggested by Drew
> > 
> >  arch/arm64/include/asm/kvm_asm.h  | 14 ++++++++++++++
> >  arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++
> >  arch/arm64/kernel/asm-offsets.c   |  1 +
> >  arch/arm64/kvm/hyp/entry.S        |  6 +-----
> >  arch/arm64/kvm/hyp/hyp-entry.S    | 31 +++++++++++++------------------
> >  arch/arm64/kvm/hyp/switch.c       |  5 +----
> >  arch/arm64/kvm/hyp/sysreg-sr.c    |  5 +++++
> >  7 files changed, 50 insertions(+), 27 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> > index 24961b732e65..6b626750b0a1 100644
> > --- a/arch/arm64/include/asm/kvm_asm.h
> > +++ b/arch/arm64/include/asm/kvm_asm.h
> > @@ -33,6 +33,7 @@
> >  #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
> >  #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
> >  
> > +/* Translate a kernel address of @sym into its equivalent linear mapping */
> >  #define kvm_ksym_ref(sym)						\
> >  	({								\
> >  		void *val = &sym;					\
> > @@ -70,6 +71,19 @@ extern u32 __init_stage2_translation(void);
> >  
> >  extern void __qcom_hyp_sanitize_btac_predictors(void);
> >  
> > +#else /* __ASSEMBLY__ */
> > +
> > +.macro get_host_ctxt reg, tmp
> > +	adr_l	\reg, kvm_host_cpu_state
> > +	mrs	\tmp, tpidr_el2
> > +	add	\reg, \reg, \tmp
> > +.endm
> > +
> > +.macro get_vcpu vcpu, ctxt
> > +	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
> > +	kern_hyp_va	\vcpu
> > +.endm
> > +
> >  #endif
> >  
> >  #endif /* __ARM_KVM_ASM_H__ */
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 596f8e414a4c..618cfee7206a 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -358,10 +358,15 @@ int kvm_perf_teardown(void);
> >  
> >  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
> >  
> > +void __kvm_set_tpidr_el2(u64 tpidr_el2);
> > +DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
> > +
> >  static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
> >  				       unsigned long hyp_stack_ptr,
> >  				       unsigned long vector_ptr)
> >  {
> > +	u64 tpidr_el2;
> > +
> >  	/*
> >  	 * Call initialization code, and switch to the full blown HYP code.
> >  	 * If the cpucaps haven't been finalized yet, something has gone very
> > @@ -370,6 +375,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
> >  	 */
> >  	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
> >  	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
> > +
> > +	/*
> > +	 * Calculate the raw per-cpu offset without a translation from the
> > +	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
> > +	 * so that we can use adr_l to access per-cpu variables in EL2.
> > +	 */
> > +	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
> > +		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
> > +
> > +	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
> >  }
> >  
> >  static inline void kvm_arch_hardware_unsetup(void) {}
> > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> > index 1303e04110cd..78e1b0a70aaf 100644
> > --- a/arch/arm64/kernel/asm-offsets.c
> > +++ b/arch/arm64/kernel/asm-offsets.c
> > @@ -138,6 +138,7 @@ int main(void)
> >    DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
> >    DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
> >    DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
> > +  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
> >  #endif
> >  #ifdef CONFIG_CPU_PM
> >    DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
> > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> > index fdd1068ee3a5..1f458f7c3b44 100644
> > --- a/arch/arm64/kvm/hyp/entry.S
> > +++ b/arch/arm64/kvm/hyp/entry.S
> > @@ -62,9 +62,6 @@ ENTRY(__guest_enter)
> >  	// Store the host regs
> >  	save_callee_saved_regs x1
> >  
> > -	// Store host_ctxt and vcpu for use at exit time
> > -	stp	x1, x0, [sp, #-16]!
> > -
> >  	add	x18, x0, #VCPU_CONTEXT
> >  
> >  	// Restore guest regs x0-x17
> > @@ -118,8 +115,7 @@ ENTRY(__guest_exit)
> >  	// Store the guest regs x19-x29, lr
> >  	save_callee_saved_regs x1
> >  
> > -	// Restore the host_ctxt from the stack
> > -	ldr	x2, [sp], #16
> > +	get_host_ctxt	x2, x3
> >  
> >  	// Now restore the host regs
> >  	restore_callee_saved_regs x2
> > diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> > index f36464bd57c5..559b4d54bc42 100644
> > --- a/arch/arm64/kvm/hyp/hyp-entry.S
> > +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> > @@ -57,13 +57,8 @@ ENDPROC(__vhe_hyp_call)
> >  el1_sync:				// Guest trapped into EL2
> >  	stp	x0, x1, [sp, #-16]!
> >  
> > -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> > -	mrs	x1, esr_el2
> > -alternative_else
> > -	mrs	x1, esr_el1
> > -alternative_endif
> > -	lsr	x0, x1, #ESR_ELx_EC_SHIFT
> > -
> > +	mrs	x0, esr_el2
> > +	lsr	x0, x0, #ESR_ELx_EC_SHIFT
> >  	cmp	x0, #ESR_ELx_EC_HVC64
> >  	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
> >  	b.ne	el1_trap
> > @@ -117,10 +112,15 @@ el1_hvc_guest:
> >  	eret
> >  
> >  el1_trap:
> > +	get_host_ctxt	x0, x1
> > +	get_vcpu	x1, x0
> > +
> > +	mrs		x0, esr_el2
> > +	lsr		x0, x0, #ESR_ELx_EC_SHIFT
> >  	/*
> >  	 * x0: ESR_EC
> > +	 * x1: vcpu pointer
> >  	 */
> > -	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
> >  
> >  	/*
> >  	 * We trap the first access to the FP/SIMD to save the host context
> > @@ -138,13 +138,15 @@ alternative_else_nop_endif
> >  
> >  el1_irq:
> >  	stp     x0, x1, [sp, #-16]!
> > -	ldr	x1, [sp, #16 + 8]
> > +	get_host_ctxt	x0, x1
> > +	get_vcpu	x1, x0
> >  	mov	x0, #ARM_EXCEPTION_IRQ
> >  	b	__guest_exit
> >  
> >  el1_error:
> >  	stp     x0, x1, [sp, #-16]!
> > -	ldr	x1, [sp, #16 + 8]
> > +	get_host_ctxt	x0, x1
> > +	get_vcpu	x1, x0
> 
> Given how frequent this construct is, would there be a benefit in
> having something like "get_vcpu_ptr" that conflates the two macros? We
> don't seem to have a single case of using get_vcpu on its own.
> 

I think my intention was to make it obvious how we get to the vcpu
pointer, but looking at it now I don't think this adds anything, so I'm
happy to adjust.  How about adding a get_vcpu_ptr macro which calls the
other two macros?

> >  	mov	x0, #ARM_EXCEPTION_EL1_SERROR
> >  	b	__guest_exit
> >  
> > @@ -180,14 +182,7 @@ ENTRY(__hyp_do_panic)
> >  ENDPROC(__hyp_do_panic)
> >  
> >  ENTRY(__hyp_panic)
> > -	/*
> > -	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> > -	 * not be accessible by this address from EL2, hyp_panic() converts
> > -	 * it with kern_hyp_va() before use.
> > -	 */
> > -	ldr	x0, =kvm_host_cpu_state
> > -	mrs	x1, tpidr_el2
> > -	add	x0, x0, x1
> > +	get_host_ctxt x0, x1
> >  	b	hyp_panic
> >  ENDPROC(__hyp_panic)
> >  
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index b991f85c1133..d1749fa0bfc3 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -467,7 +467,7 @@ static hyp_alternate_select(__hyp_call_panic,
> >  			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
> >  			    ARM64_HAS_VIRT_HOST_EXTN);
> >  
> > -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> > +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
> >  {
> >  	struct kvm_vcpu *vcpu = NULL;
> >  
> > @@ -476,9 +476,6 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> >  	u64 par = read_sysreg(par_el1);
> >  
> >  	if (read_sysreg(vttbr_el2)) {
> > -		struct kvm_cpu_context *host_ctxt;
> > -
> > -		host_ctxt = kern_hyp_va(__host_ctxt);
> >  		vcpu = host_ctxt->__hyp_running_vcpu;
> >  		__timer_disable_traps(vcpu);
> >  		__deactivate_traps(vcpu);
> > diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
> > index 2c17afd2be96..43b7dd65e3e6 100644
> > --- a/arch/arm64/kvm/hyp/sysreg-sr.c
> > +++ b/arch/arm64/kvm/hyp/sysreg-sr.c
> > @@ -189,3 +189,8 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
> >  	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
> >  		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
> >  }
> > +
> > +void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
> > +{
> > +	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
> 
> The paranoid in me says that we'd want an ISB here if we can possibly
> use tpidr_el2 on this path. If we had to manic, for example...
> 

I'm not sure I follow.  Are you not guaranteed that an mrs after msr
would reflect the latest written value, even without an ISB?

> > +}
> > -- 
> > 2.14.2
> > 
> 
> The above notwithstanding,
> 
> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
> 
Thanks!
-Christoffer
Christoffer Dall Feb. 22, 2018, 9:10 a.m. UTC | #6
On Wed, Feb 21, 2018 at 06:32:00PM +0100, Andrew Jones wrote:
> On Thu, Feb 15, 2018 at 10:02:55PM +0100, Christoffer Dall wrote:
> > We already have the percpu area for the host cpu state, which points to
> > the VCPU, so there's no need to store the VCPU pointer on the stack on
> > every context switch.  We can be a little more clever and just use
> > tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> > context.
> > 
> > This does require us to calculate the percpu offset without including
> > the offset from the kernel mapping of the percpu array to the linear
> > mapping of the array (which is what we store in tpidr_el1), because a
> > PC-relative generated address in EL2 is already giving us the hyp alias
> > of the linear mapping of a kernel address.  We do this in
> > __cpu_init_hyp_mode() by using kvm_ksym_ref().
> > 
> > This change also requires us to have a scratch register, so we take the
> > chance to rearrange some of the el1_sync code to only look at the
> > vttbr_el2 to determine if this is a trap from the guest or an HVC from
> > the host.  We do add an extra check to call the panic code if the kernel
> > is configured with debugging enabled and we saw a trap from the host
> > which wasn't an HVC, indicating that we left some EL2 trap configured by
> > mistake.
> > 
> > The code that accesses ESR_EL2 was previously using an alternative to
> > use the _EL1 accessor on VHE systems, but this was actually unnecessary
> > as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
> > accessor does the same thing on both systems.
> > 
> > Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > ---
> > 
> > Notes:
> >     Changes since v3:
> >      - Reworked the assembly part of the patch after rebasing on v4.16-rc1
> >        which created a conflict with the variant 2 mitigations.
> >      - Removed Marc's reviewed-by due to the rework.
> >      - Removed unneeded extern keyword in declaration in header file
> >     
> >     Changes since v1:
> >      - Use PC-relative addressing to access per-cpu variables instead of
> >        using a load from the literal pool.
> >      - Remove stale comments as pointed out by Marc
> >      - Reworded the commit message as suggested by Drew
> > 
> >  arch/arm64/include/asm/kvm_asm.h  | 14 ++++++++++++++
> >  arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++
> >  arch/arm64/kernel/asm-offsets.c   |  1 +
> >  arch/arm64/kvm/hyp/entry.S        |  6 +-----
> >  arch/arm64/kvm/hyp/hyp-entry.S    | 31 +++++++++++++------------------
> >  arch/arm64/kvm/hyp/switch.c       |  5 +----
> >  arch/arm64/kvm/hyp/sysreg-sr.c    |  5 +++++
> >  7 files changed, 50 insertions(+), 27 deletions(-)
> > 
> 
> I'm not clear on the motivation for this patch. I assumed it enabled
> simpler patches later in the series, but I did a bit of reading ahead
> and didn't see anything obvious. I doubt it gives a speedup, so is it
> just to avoid stack use? Making it easier to maintain these assembly
> functions that span a couple files? If so, should it be posted separately
> from this series? If not, could you please add some more text to the
> commit message helping me better understand the full motivation?

In the past we've had difficulties debugging things where we messed up
the stack because we couldn't get back to the normal world with no
reliable way to get the vcpu pointer.  That was the rationale for
storing the vcpu in a register as opposed to on the stack before.

We only recently changed that so that we could use tpidr_el2 to access
per-CPU variables.  Given that the vcpu pointer can already be found via
per-CPU variables, it makes sense to do that.

In terms of performance, your argument can be applied in isolation to
most patches in this series, and that was the initial approach I took in
the optimization work, only optimizing things that appeared significant
and would likely result in significant changes.  The results were
disappointing.  It was only when I included every micro-optimization for
the critical path that I could think of, that we were able to observe
order of magnitude improvements for some workloads.  Subsequent
measurements confirmed this, it was hard to measure individual benefits
from each patch, but overall the changes matter.

> 
> Besides my confusion on motivation, it looks good to me
> 

In that case, unless there's an argument that the code has become too
hard to understand, ...

> Reviewed-by: Andrew Jones <drjones@redhat.com>
> 
...then thanks!
-Christoffer
Marc Zyngier Feb. 22, 2018, 9:35 a.m. UTC | #7
On Thu, 22 Feb 2018 09:02:48 +0000,
Christoffer Dall wrote:
> 
> On Wed, Feb 21, 2018 at 11:34:07AM +0000, Marc Zyngier wrote:
> > On Thu, 15 Feb 2018 21:02:55 +0000,
> > Christoffer Dall wrote:
> > > 
> > > We already have the percpu area for the host cpu state, which points to
> > > the VCPU, so there's no need to store the VCPU pointer on the stack on
> > > every context switch.  We can be a little more clever and just use
> > > tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> > > context.
> > > 
> > > This does require us to calculate the percpu offset without including
> > > the offset from the kernel mapping of the percpu array to the linear
> > > mapping of the array (which is what we store in tpidr_el1), because a
> > > PC-relative generated address in EL2 is already giving us the hyp alias
> > > of the linear mapping of a kernel address.  We do this in
> > > __cpu_init_hyp_mode() by using kvm_ksym_ref().
> > > 
> > > This change also requires us to have a scratch register, so we take the
> > > chance to rearrange some of the el1_sync code to only look at the
> > > vttbr_el2 to determine if this is a trap from the guest or an HVC from
> > > the host.  We do add an extra check to call the panic code if the kernel
> > > is configured with debugging enabled and we saw a trap from the host
> > > which wasn't an HVC, indicating that we left some EL2 trap configured by
> > > mistake.
> > > 
> > > The code that accesses ESR_EL2 was previously using an alternative to
> > > use the _EL1 accessor on VHE systems, but this was actually unnecessary
> > > as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
> > > accessor does the same thing on both systems.
> > > 
> > > Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> > > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > > ---
> > > 
> > > Notes:
> > >     Changes since v3:
> > >      - Reworked the assembly part of the patch after rebasing on v4.16-rc1
> > >        which created a conflict with the variant 2 mitigations.
> > >      - Removed Marc's reviewed-by due to the rework.
> > >      - Removed unneeded extern keyword in declaration in header file
> > >     
> > >     Changes since v1:
> > >      - Use PC-relative addressing to access per-cpu variables instead of
> > >        using a load from the literal pool.
> > >      - Remove stale comments as pointed out by Marc
> > >      - Reworded the commit message as suggested by Drew
> > > 
> > >  arch/arm64/include/asm/kvm_asm.h  | 14 ++++++++++++++
> > >  arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++
> > >  arch/arm64/kernel/asm-offsets.c   |  1 +
> > >  arch/arm64/kvm/hyp/entry.S        |  6 +-----
> > >  arch/arm64/kvm/hyp/hyp-entry.S    | 31 +++++++++++++------------------
> > >  arch/arm64/kvm/hyp/switch.c       |  5 +----
> > >  arch/arm64/kvm/hyp/sysreg-sr.c    |  5 +++++
> > >  7 files changed, 50 insertions(+), 27 deletions(-)
> > > 
> > > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> > > index 24961b732e65..6b626750b0a1 100644
> > > --- a/arch/arm64/include/asm/kvm_asm.h
> > > +++ b/arch/arm64/include/asm/kvm_asm.h
> > > @@ -33,6 +33,7 @@
> > >  #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
> > >  #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
> > >  
> > > +/* Translate a kernel address of @sym into its equivalent linear mapping */
> > >  #define kvm_ksym_ref(sym)						\
> > >  	({								\
> > >  		void *val = &sym;					\
> > > @@ -70,6 +71,19 @@ extern u32 __init_stage2_translation(void);
> > >  
> > >  extern void __qcom_hyp_sanitize_btac_predictors(void);
> > >  
> > > +#else /* __ASSEMBLY__ */
> > > +
> > > +.macro get_host_ctxt reg, tmp
> > > +	adr_l	\reg, kvm_host_cpu_state
> > > +	mrs	\tmp, tpidr_el2
> > > +	add	\reg, \reg, \tmp
> > > +.endm
> > > +
> > > +.macro get_vcpu vcpu, ctxt
> > > +	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
> > > +	kern_hyp_va	\vcpu
> > > +.endm
> > > +
> > >  #endif
> > >  
> > >  #endif /* __ARM_KVM_ASM_H__ */
> > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > > index 596f8e414a4c..618cfee7206a 100644
> > > --- a/arch/arm64/include/asm/kvm_host.h
> > > +++ b/arch/arm64/include/asm/kvm_host.h
> > > @@ -358,10 +358,15 @@ int kvm_perf_teardown(void);
> > >  
> > >  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
> > >  
> > > +void __kvm_set_tpidr_el2(u64 tpidr_el2);
> > > +DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
> > > +
> > >  static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
> > >  				       unsigned long hyp_stack_ptr,
> > >  				       unsigned long vector_ptr)
> > >  {
> > > +	u64 tpidr_el2;
> > > +
> > >  	/*
> > >  	 * Call initialization code, and switch to the full blown HYP code.
> > >  	 * If the cpucaps haven't been finalized yet, something has gone very
> > > @@ -370,6 +375,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
> > >  	 */
> > >  	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
> > >  	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
> > > +
> > > +	/*
> > > +	 * Calculate the raw per-cpu offset without a translation from the
> > > +	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
> > > +	 * so that we can use adr_l to access per-cpu variables in EL2.
> > > +	 */
> > > +	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
> > > +		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
> > > +
> > > +	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
> > >  }
> > >  
> > >  static inline void kvm_arch_hardware_unsetup(void) {}
> > > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> > > index 1303e04110cd..78e1b0a70aaf 100644
> > > --- a/arch/arm64/kernel/asm-offsets.c
> > > +++ b/arch/arm64/kernel/asm-offsets.c
> > > @@ -138,6 +138,7 @@ int main(void)
> > >    DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
> > >    DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
> > >    DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
> > > +  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
> > >  #endif
> > >  #ifdef CONFIG_CPU_PM
> > >    DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
> > > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> > > index fdd1068ee3a5..1f458f7c3b44 100644
> > > --- a/arch/arm64/kvm/hyp/entry.S
> > > +++ b/arch/arm64/kvm/hyp/entry.S
> > > @@ -62,9 +62,6 @@ ENTRY(__guest_enter)
> > >  	// Store the host regs
> > >  	save_callee_saved_regs x1
> > >  
> > > -	// Store host_ctxt and vcpu for use at exit time
> > > -	stp	x1, x0, [sp, #-16]!
> > > -
> > >  	add	x18, x0, #VCPU_CONTEXT
> > >  
> > >  	// Restore guest regs x0-x17
> > > @@ -118,8 +115,7 @@ ENTRY(__guest_exit)
> > >  	// Store the guest regs x19-x29, lr
> > >  	save_callee_saved_regs x1
> > >  
> > > -	// Restore the host_ctxt from the stack
> > > -	ldr	x2, [sp], #16
> > > +	get_host_ctxt	x2, x3
> > >  
> > >  	// Now restore the host regs
> > >  	restore_callee_saved_regs x2
> > > diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> > > index f36464bd57c5..559b4d54bc42 100644
> > > --- a/arch/arm64/kvm/hyp/hyp-entry.S
> > > +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> > > @@ -57,13 +57,8 @@ ENDPROC(__vhe_hyp_call)
> > >  el1_sync:				// Guest trapped into EL2
> > >  	stp	x0, x1, [sp, #-16]!
> > >  
> > > -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> > > -	mrs	x1, esr_el2
> > > -alternative_else
> > > -	mrs	x1, esr_el1
> > > -alternative_endif
> > > -	lsr	x0, x1, #ESR_ELx_EC_SHIFT
> > > -
> > > +	mrs	x0, esr_el2
> > > +	lsr	x0, x0, #ESR_ELx_EC_SHIFT
> > >  	cmp	x0, #ESR_ELx_EC_HVC64
> > >  	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
> > >  	b.ne	el1_trap
> > > @@ -117,10 +112,15 @@ el1_hvc_guest:
> > >  	eret
> > >  
> > >  el1_trap:
> > > +	get_host_ctxt	x0, x1
> > > +	get_vcpu	x1, x0
> > > +
> > > +	mrs		x0, esr_el2
> > > +	lsr		x0, x0, #ESR_ELx_EC_SHIFT
> > >  	/*
> > >  	 * x0: ESR_EC
> > > +	 * x1: vcpu pointer
> > >  	 */
> > > -	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
> > >  
> > >  	/*
> > >  	 * We trap the first access to the FP/SIMD to save the host context
> > > @@ -138,13 +138,15 @@ alternative_else_nop_endif
> > >  
> > >  el1_irq:
> > >  	stp     x0, x1, [sp, #-16]!
> > > -	ldr	x1, [sp, #16 + 8]
> > > +	get_host_ctxt	x0, x1
> > > +	get_vcpu	x1, x0
> > >  	mov	x0, #ARM_EXCEPTION_IRQ
> > >  	b	__guest_exit
> > >  
> > >  el1_error:
> > >  	stp     x0, x1, [sp, #-16]!
> > > -	ldr	x1, [sp, #16 + 8]
> > > +	get_host_ctxt	x0, x1
> > > +	get_vcpu	x1, x0
> > 
> > Given how frequent this construct is, would there be a benefit in
> > having something like "get_vcpu_ptr" that conflates the two macros? We
> > don't seem to have a single case of using get_vcpu on its own.
> > 
> 
> I think my intention was to make it obvious how we get to the vcpu
> pointer, but looking at it now I don't think this adds anything, so I'm
> happy to adjust.  How about adding a get_vcpu_ptr macro which calls the
> other two macros?

Sure, that would make sense.

> 
> > >  	mov	x0, #ARM_EXCEPTION_EL1_SERROR
> > >  	b	__guest_exit
> > >  
> > > @@ -180,14 +182,7 @@ ENTRY(__hyp_do_panic)
> > >  ENDPROC(__hyp_do_panic)
> > >  
> > >  ENTRY(__hyp_panic)
> > > -	/*
> > > -	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> > > -	 * not be accessible by this address from EL2, hyp_panic() converts
> > > -	 * it with kern_hyp_va() before use.
> > > -	 */
> > > -	ldr	x0, =kvm_host_cpu_state
> > > -	mrs	x1, tpidr_el2
> > > -	add	x0, x0, x1
> > > +	get_host_ctxt x0, x1
> > >  	b	hyp_panic
> > >  ENDPROC(__hyp_panic)
> > >  
> > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > > index b991f85c1133..d1749fa0bfc3 100644
> > > --- a/arch/arm64/kvm/hyp/switch.c
> > > +++ b/arch/arm64/kvm/hyp/switch.c
> > > @@ -467,7 +467,7 @@ static hyp_alternate_select(__hyp_call_panic,
> > >  			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
> > >  			    ARM64_HAS_VIRT_HOST_EXTN);
> > >  
> > > -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> > > +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
> > >  {
> > >  	struct kvm_vcpu *vcpu = NULL;
> > >  
> > > @@ -476,9 +476,6 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> > >  	u64 par = read_sysreg(par_el1);
> > >  
> > >  	if (read_sysreg(vttbr_el2)) {
> > > -		struct kvm_cpu_context *host_ctxt;
> > > -
> > > -		host_ctxt = kern_hyp_va(__host_ctxt);
> > >  		vcpu = host_ctxt->__hyp_running_vcpu;
> > >  		__timer_disable_traps(vcpu);
> > >  		__deactivate_traps(vcpu);
> > > diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
> > > index 2c17afd2be96..43b7dd65e3e6 100644
> > > --- a/arch/arm64/kvm/hyp/sysreg-sr.c
> > > +++ b/arch/arm64/kvm/hyp/sysreg-sr.c
> > > @@ -189,3 +189,8 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
> > >  	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
> > >  		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
> > >  }
> > > +
> > > +void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
> > > +{
> > > +	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
> > 
> > The paranoid in me says that we'd want an ISB here if we can possibly
> > use tpidr_el2 on this path. If we had to manic, for example...
> > 
> 
> I'm not sure I follow.  Are you not guaranteed that an mrs after msr
> would reflect the latest written value, even without an ISB?

You're absolutely right. /me dropping paranoia levels.

Thanks,

	M.
Andrew Jones Feb. 22, 2018, 9:49 a.m. UTC | #8
On Thu, Feb 22, 2018 at 10:02:48AM +0100, Christoffer Dall wrote:
> On Wed, Feb 21, 2018 at 11:34:07AM +0000, Marc Zyngier wrote:
> > On Thu, 15 Feb 2018 21:02:55 +0000,
> > Christoffer Dall wrote:
> > > @@ -138,13 +138,15 @@ alternative_else_nop_endif
> > >  
> > >  el1_irq:
> > >  	stp     x0, x1, [sp, #-16]!
> > > -	ldr	x1, [sp, #16 + 8]
> > > +	get_host_ctxt	x0, x1
> > > +	get_vcpu	x1, x0
> > >  	mov	x0, #ARM_EXCEPTION_IRQ
> > >  	b	__guest_exit
> > >  
> > >  el1_error:
> > >  	stp     x0, x1, [sp, #-16]!
> > > -	ldr	x1, [sp, #16 + 8]
> > > +	get_host_ctxt	x0, x1
> > > +	get_vcpu	x1, x0
> > 
> > Given how frequent this construct is, would there be a benefit in
> > having something like "get_vcpu_ptr" that conflates the two macros? We
> > don't seem to have a single case of using get_vcpu on its own.
> > 
> 
> I think my intention was to make it obvious how we get to the vcpu
> pointer, but looking at it now I don't think this adds anything, so I'm
> happy to adjust.  How about adding a get_vcpu_ptr macro which calls the
> other two macros?
>

Do we really need three macros, if get_vcpu is never used independently?
In the first round of reviews I suggested redefining get_vcpu like this

"""
 .macro get_vcpu vcpu, tmp
     get_host_ctxt \tmp, \vcpu
     ldr     \vcpu, [\tmp, #HOST_CONTEXT_VCPU]
     kern_hyp_va     \vcpu
 .endm

 which also has the side-effect of tmp being ctxt after the call.
"""

Thanks,
drew
Andrew Jones Feb. 22, 2018, 9:56 a.m. UTC | #9
On Thu, Feb 22, 2018 at 10:10:34AM +0100, Christoffer Dall wrote:
> On Wed, Feb 21, 2018 at 06:32:00PM +0100, Andrew Jones wrote:
> > 
> > Besides my confusion on motivation, it looks good to me
> > 
> 
> In that case, unless there's an argument that the code has become too
> hard to understand, ...
>

On the contrary, I think it's easier to read now than before. I just
wasn't clear how it all tied together with this series. If its
combination with other patches enables a speedup, then it certainly
fits here. I was just comparing mrs+loads vs. a load from stack and
it seemed it would actually cause a micro-performance-decrease.

Thanks,
drew
Christoffer Dall Feb. 22, 2018, 10:24 a.m. UTC | #10
On Thu, Feb 22, 2018 at 10:56:41AM +0100, Andrew Jones wrote:
> On Thu, Feb 22, 2018 at 10:10:34AM +0100, Christoffer Dall wrote:
> > On Wed, Feb 21, 2018 at 06:32:00PM +0100, Andrew Jones wrote:
> > > 
> > > Besides my confusion on motivation, it looks good to me
> > > 
> > 
> > In that case, unless there's an argument that the code has become too
> > hard to understand, ...
> >
> 
> On the contrary, I think it's easier to read now than before. I just
> wasn't clear how it all tied together with this series. If its
> combination with other patches enables a speedup, then it certainly
> fits here. I was just comparing mrs+loads vs. a load from stack and

The comparison should be mrs+load, vs. str+load (you save the store on
the stack).

> it seemed it would actually cause a micro-performance-decrease.
> 

Thanks,
-Christoffer
diff mbox

Patch

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24961b732e65..6b626750b0a1 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,6 +33,7 @@ 
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+/* Translate a kernel address of @sym into its equivalent linear mapping */
 #define kvm_ksym_ref(sym)						\
 	({								\
 		void *val = &sym;					\
@@ -70,6 +71,19 @@  extern u32 __init_stage2_translation(void);
 
 extern void __qcom_hyp_sanitize_btac_predictors(void);
 
+#else /* __ASSEMBLY__ */
+
+.macro get_host_ctxt reg, tmp
+	adr_l	\reg, kvm_host_cpu_state
+	mrs	\tmp, tpidr_el2
+	add	\reg, \reg, \tmp
+.endm
+
+.macro get_vcpu vcpu, ctxt
+	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+	kern_hyp_va	\vcpu
+.endm
+
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 596f8e414a4c..618cfee7206a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -358,10 +358,15 @@  int kvm_perf_teardown(void);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
+void __kvm_set_tpidr_el2(u64 tpidr_el2);
+DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
+	u64 tpidr_el2;
+
 	/*
 	 * Call initialization code, and switch to the full blown HYP code.
 	 * If the cpucaps haven't been finalized yet, something has gone very
@@ -370,6 +375,16 @@  static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	 */
 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+	/*
+	 * Calculate the raw per-cpu offset without a translation from the
+	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
+	 * so that we can use adr_l to access per-cpu variables in EL2.
+	 */
+	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
+		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
+
+	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
 }
 
 static inline void kvm_arch_hardware_unsetup(void) {}
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 1303e04110cd..78e1b0a70aaf 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -138,6 +138,7 @@  int main(void)
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
   DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
 #endif
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fdd1068ee3a5..1f458f7c3b44 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,9 +62,6 @@  ENTRY(__guest_enter)
 	// Store the host regs
 	save_callee_saved_regs x1
 
-	// Store host_ctxt and vcpu for use at exit time
-	stp	x1, x0, [sp, #-16]!
-
 	add	x18, x0, #VCPU_CONTEXT
 
 	// Restore guest regs x0-x17
@@ -118,8 +115,7 @@  ENTRY(__guest_exit)
 	// Store the guest regs x19-x29, lr
 	save_callee_saved_regs x1
 
-	// Restore the host_ctxt from the stack
-	ldr	x2, [sp], #16
+	get_host_ctxt	x2, x3
 
 	// Now restore the host regs
 	restore_callee_saved_regs x2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index f36464bd57c5..559b4d54bc42 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -57,13 +57,8 @@  ENDPROC(__vhe_hyp_call)
 el1_sync:				// Guest trapped into EL2
 	stp	x0, x1, [sp, #-16]!
 
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x1, esr_el2
-alternative_else
-	mrs	x1, esr_el1
-alternative_endif
-	lsr	x0, x1, #ESR_ELx_EC_SHIFT
-
+	mrs	x0, esr_el2
+	lsr	x0, x0, #ESR_ELx_EC_SHIFT
 	cmp	x0, #ESR_ELx_EC_HVC64
 	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
 	b.ne	el1_trap
@@ -117,10 +112,15 @@  el1_hvc_guest:
 	eret
 
 el1_trap:
+	get_host_ctxt	x0, x1
+	get_vcpu	x1, x0
+
+	mrs		x0, esr_el2
+	lsr		x0, x0, #ESR_ELx_EC_SHIFT
 	/*
 	 * x0: ESR_EC
+	 * x1: vcpu pointer
 	 */
-	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
 
 	/*
 	 * We trap the first access to the FP/SIMD to save the host context
@@ -138,13 +138,15 @@  alternative_else_nop_endif
 
 el1_irq:
 	stp     x0, x1, [sp, #-16]!
-	ldr	x1, [sp, #16 + 8]
+	get_host_ctxt	x0, x1
+	get_vcpu	x1, x0
 	mov	x0, #ARM_EXCEPTION_IRQ
 	b	__guest_exit
 
 el1_error:
 	stp     x0, x1, [sp, #-16]!
-	ldr	x1, [sp, #16 + 8]
+	get_host_ctxt	x0, x1
+	get_vcpu	x1, x0
 	mov	x0, #ARM_EXCEPTION_EL1_SERROR
 	b	__guest_exit
 
@@ -180,14 +182,7 @@  ENTRY(__hyp_do_panic)
 ENDPROC(__hyp_do_panic)
 
 ENTRY(__hyp_panic)
-	/*
-	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
-	 * not be accessible by this address from EL2, hyp_panic() converts
-	 * it with kern_hyp_va() before use.
-	 */
-	ldr	x0, =kvm_host_cpu_state
-	mrs	x1, tpidr_el2
-	add	x0, x0, x1
+	get_host_ctxt x0, x1
 	b	hyp_panic
 ENDPROC(__hyp_panic)
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index b991f85c1133..d1749fa0bfc3 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -467,7 +467,7 @@  static hyp_alternate_select(__hyp_call_panic,
 			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
 			    ARM64_HAS_VIRT_HOST_EXTN);
 
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
 {
 	struct kvm_vcpu *vcpu = NULL;
 
@@ -476,9 +476,6 @@  void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
 	u64 par = read_sysreg(par_el1);
 
 	if (read_sysreg(vttbr_el2)) {
-		struct kvm_cpu_context *host_ctxt;
-
-		host_ctxt = kern_hyp_va(__host_ctxt);
 		vcpu = host_ctxt->__hyp_running_vcpu;
 		__timer_disable_traps(vcpu);
 		__deactivate_traps(vcpu);
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 2c17afd2be96..43b7dd65e3e6 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -189,3 +189,8 @@  void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
 		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
 }
+
+void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
+{
+	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
+}