diff mbox series

[v4,07/36] KVM: arm64: nv: Save/Restore vEL2 sysregs

Message ID 20241009190019.3222687-8-maz@kernel.org (mailing list archive)
State New
Headers show
Series KVM: arm64: Add EL2 support to FEAT_S1PIE/S1POE | expand

Commit Message

Marc Zyngier Oct. 9, 2024, 6:59 p.m. UTC
Whenever we need to restore the guest's system registers to the CPU, we
now need to take care of the EL2 system registers as well. Most of them
are accessed via traps only, but some have an immediate effect and also
a guest running in VHE mode would expect them to be accessible via their
EL1 encoding, which we do not trap.

For vEL2 we write the virtual EL2 registers with an identical format directly
into their EL1 counterpart, and translate the few registers that have a
different format for the same effect on the execution when running a
non-VHE guest guest hypervisor.

Based on an initial patch from Andre Przywara, rewritten many times
since.

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h |   5 +-
 arch/arm64/kvm/hyp/nvhe/sysreg-sr.c        |   2 +-
 arch/arm64/kvm/hyp/vhe/sysreg-sr.c         | 137 ++++++++++++++++++++-
 3 files changed, 139 insertions(+), 5 deletions(-)

Comments

Oliver Upton Oct. 9, 2024, 7:55 p.m. UTC | #1
On Wed, Oct 09, 2024 at 07:59:50PM +0100, Marc Zyngier wrote:
> +static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
> +{
> +	u64 val;
> +
> +	/* These registers are common with EL1 */
> +	write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1),	par_el1);
> +	write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1),	tpidr_el1);
> +
> +	write_sysreg(read_cpuid_id(),				vpidr_el2);

I don't think we need to restore VPIDR_EL2 here, so long as we do it on
vcpu_put() when leaving a nested VM context. That seems like the right
place to have it, as we could be running a mix of nested and non-nested
VMs and don't ever poke VPIDR_EL2 for non-NV VMs.

> @@ -89,7 +192,29 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
>  	 */
>  	__sysreg32_restore_state(vcpu);
>  	__sysreg_restore_user_state(guest_ctxt);
> -	__sysreg_restore_el1_state(guest_ctxt);
> +
> +	if (unlikely(__is_hyp_ctxt(guest_ctxt))) {
> +		__sysreg_restore_vel2_state(vcpu);
> +	} else {
> +		if (vcpu_has_nv(vcpu)) {
> +			/*
> +			 * Only set VPIDR_EL2 for nested VMs, as this is the
> +			 * only time it changes. We'll restore the MIDR_EL1
> +			 * view on put.
> +			 */

Slightly ambiguous what "VPIDR_EL2" this is referring to (hardware reg
v. guest value). Maybe:

			/*
			 * Use the guest hypervisor's VPIDR_EL2 when in a nested
			 * state. The hardware value of MIDR_EL1 gets restored on
			 * put.
			 */

> +			write_sysreg(ctxt_sys_reg(guest_ctxt, VPIDR_EL2), vpidr_el2);
> +
> +			/*
> +			 * As we're restoring a nested guest, set the value
> +			 * provided by the guest hypervisor.
> +			 */
> +			mpidr = ctxt_sys_reg(guest_ctxt, VMPIDR_EL2);
> +		} else {
> +			mpidr = ctxt_sys_reg(guest_ctxt, MPIDR_EL1);
> +		}
> +
> +		__sysreg_restore_el1_state(guest_ctxt, mpidr);
> +	}
>  
>  	vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
>  }
> @@ -112,12 +237,20 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
>  
>  	host_ctxt = host_data_ptr(host_ctxt);
>  
> -	__sysreg_save_el1_state(guest_ctxt);
> +	if (unlikely(__is_hyp_ctxt(guest_ctxt)))
> +		__sysreg_save_vel2_state(vcpu);
> +	else
> +		__sysreg_save_el1_state(guest_ctxt);
> +
>  	__sysreg_save_user_state(guest_ctxt);
>  	__sysreg32_save_state(vcpu);
>  
>  	/* Restore host user state */
>  	__sysreg_restore_user_state(host_ctxt);
>  
> +	/* If leaving a nesting guest, restore MPIDR_EL1 default view */

typo: MIDR_EL1

> +	if (vcpu_has_nv(vcpu))
> +		write_sysreg(read_cpuid_id(),	vpidr_el2);
> +
>  	vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
>  }
> -- 
> 2.39.2
>
Alexandru Elisei Oct. 16, 2024, 1:12 p.m. UTC | #2
Hi Marc,

On Wed, Oct 09, 2024 at 07:59:50PM +0100, Marc Zyngier wrote:
> Whenever we need to restore the guest's system registers to the CPU, we
> now need to take care of the EL2 system registers as well. Most of them
> are accessed via traps only, but some have an immediate effect and also
> a guest running in VHE mode would expect them to be accessible via their
> EL1 encoding, which we do not trap.
> 
> For vEL2 we write the virtual EL2 registers with an identical format directly
> into their EL1 counterpart, and translate the few registers that have a
> different format for the same effect on the execution when running a
> non-VHE guest guest hypervisor.
> 
> Based on an initial patch from Andre Przywara, rewritten many times
> since.
> 
> Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h |   5 +-
>  arch/arm64/kvm/hyp/nvhe/sysreg-sr.c        |   2 +-
>  arch/arm64/kvm/hyp/vhe/sysreg-sr.c         | 137 ++++++++++++++++++++-
>  3 files changed, 139 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> index 1579a3c08a36b..d67628d01bf5e 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> @@ -152,9 +152,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
>  	write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0),	tpidrro_el0);
>  }
>  
> -static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
> +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
> +					      u64 mpidr)
>  {
> -	write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1),	vmpidr_el2);
> +	write_sysreg(mpidr,				vmpidr_el2);
>  
>  	if (has_vhe() ||
>  	    !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
> diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
> index 29305022bc048..dba101565de36 100644
> --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
> +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
> @@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
>  
>  void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
>  {
> -	__sysreg_restore_el1_state(ctxt);
> +	__sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1));
>  	__sysreg_restore_common_state(ctxt);
>  	__sysreg_restore_user_state(ctxt);
>  	__sysreg_restore_el2_return_state(ctxt);
> diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
> index e12bd7d6d2dce..e0df14ead2657 100644
> --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
> +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
> @@ -15,6 +15,108 @@
>  #include <asm/kvm_hyp.h>
>  #include <asm/kvm_nested.h>
>  
> +static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
> +{
> +	/* These registers are common with EL1 */
> +	__vcpu_sys_reg(vcpu, PAR_EL1)	= read_sysreg(par_el1);
> +	__vcpu_sys_reg(vcpu, TPIDR_EL1)	= read_sysreg(tpidr_el1);
> +
> +	__vcpu_sys_reg(vcpu, ESR_EL2)	= read_sysreg_el1(SYS_ESR);
> +	__vcpu_sys_reg(vcpu, AFSR0_EL2)	= read_sysreg_el1(SYS_AFSR0);
> +	__vcpu_sys_reg(vcpu, AFSR1_EL2)	= read_sysreg_el1(SYS_AFSR1);
> +	__vcpu_sys_reg(vcpu, FAR_EL2)	= read_sysreg_el1(SYS_FAR);
> +	__vcpu_sys_reg(vcpu, MAIR_EL2)	= read_sysreg_el1(SYS_MAIR);
> +	__vcpu_sys_reg(vcpu, VBAR_EL2)	= read_sysreg_el1(SYS_VBAR);
> +	__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR);
> +	__vcpu_sys_reg(vcpu, AMAIR_EL2)	= read_sysreg_el1(SYS_AMAIR);
> +
> +	/*
> +	 * In VHE mode those registers are compatible between EL1 and EL2,
> +	 * and the guest uses the _EL1 versions on the CPU naturally.
> +	 * So we save them into their _EL2 versions here.
> +	 * For nVHE mode we trap accesses to those registers, so our
> +	 * _EL2 copy in sys_regs[] is always up-to-date and we don't need
> +	 * to save anything here.
> +	 */
> +	if (vcpu_el2_e2h_is_set(vcpu)) {
> +		u64 val;
> +
> +		/*
> +		 * We don't save CPTR_EL2, as accesses to CPACR_EL1
> +		 * are always trapped, ensuring that the in-memory
> +		 * copy is always up-to-date. A small blessing...
> +		 */
> +		__vcpu_sys_reg(vcpu, SCTLR_EL2)	= read_sysreg_el1(SYS_SCTLR);
> +		__vcpu_sys_reg(vcpu, TTBR0_EL2)	= read_sysreg_el1(SYS_TTBR0);
> +		__vcpu_sys_reg(vcpu, TTBR1_EL2)	= read_sysreg_el1(SYS_TTBR1);
> +		__vcpu_sys_reg(vcpu, TCR_EL2)	= read_sysreg_el1(SYS_TCR);
> +
> +		/*
> +		 * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where
> +		 * the interesting CNTHCTL_EL2 bits live. So preserve these
> +		 * bits when reading back the guest-visible value.
> +		 */
> +		val = read_sysreg_el1(SYS_CNTKCTL);
> +		val &= CNTKCTL_VALID_BITS;
> +		__vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS;
> +		__vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val;
> +	}
> +
> +	__vcpu_sys_reg(vcpu, SP_EL2)	= read_sysreg(sp_el1);
> +	__vcpu_sys_reg(vcpu, ELR_EL2)	= read_sysreg_el1(SYS_ELR);
> +	__vcpu_sys_reg(vcpu, SPSR_EL2)	= read_sysreg_el1(SYS_SPSR);
> +}
> +
> +static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
> +{
> +	u64 val;
> +
> +	/* These registers are common with EL1 */
> +	write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1),	par_el1);
> +	write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1),	tpidr_el1);
> +
> +	write_sysreg(read_cpuid_id(),				vpidr_el2);
> +	write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1),		vmpidr_el2);
> +	write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2),	SYS_MAIR);
> +	write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2),	SYS_VBAR);
> +	write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2),	SYS_CONTEXTIDR);
> +	write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2),	SYS_AMAIR);
> +
> +	if (vcpu_el2_e2h_is_set(vcpu)) {
> +		/*
> +		 * In VHE mode those registers are compatible between
> +		 * EL1 and EL2.
> +		 */
> +		write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2),   SYS_SCTLR);
> +		write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2),    SYS_CPACR);
> +		write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2),   SYS_TTBR0);
> +		write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2),   SYS_TTBR1);
> +		write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2),	    SYS_TCR);
> +		write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL);
> +	} else {
> +		/*
> +		 * CNTHCTL_EL2 only affects EL1 when running nVHE, so
> +		 * no need to restore it.
> +		 */

I'm having such a hard time parsing the comment - might be just me coming back to
this code after such a long time.

If CNTHCTL_EL2 only affects EL1 when running nVHE, and the else branch deals
with the nVHE case, why isn't CNTHCTL_EL2 restored?

As for the 'only' part of the comment: when E2H=1, bits 10 and 11, EL1PCTEN and
EL1PTEN (why isn't this named EL1PCEN if it does the same thing as bit 1 when
E2H=0?), trap EL1 and EL0 accesses to physical counter and timer registers.

Or 'only' in this context means only EL1, and not EL2 also?

Thanks,
Alex

> +		val = translate_sctlr_el2_to_sctlr_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2));
> +		write_sysreg_el1(val, SYS_SCTLR);
> +		val = translate_cptr_el2_to_cpacr_el1(__vcpu_sys_reg(vcpu, CPTR_EL2));
> +		write_sysreg_el1(val, SYS_CPACR);
> +		val = translate_ttbr0_el2_to_ttbr0_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2));
> +		write_sysreg_el1(val, SYS_TTBR0);
> +		val = translate_tcr_el2_to_tcr_el1(__vcpu_sys_reg(vcpu, TCR_EL2));
> +		write_sysreg_el1(val, SYS_TCR);
> +	}
Marc Zyngier Oct. 16, 2024, 1:57 p.m. UTC | #3
On Wed, 16 Oct 2024 14:12:49 +0100,
Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> 
> Hi Marc,
> 
> On Wed, Oct 09, 2024 at 07:59:50PM +0100, Marc Zyngier wrote:
> > Whenever we need to restore the guest's system registers to the CPU, we
> > now need to take care of the EL2 system registers as well. Most of them
> > are accessed via traps only, but some have an immediate effect and also
> > a guest running in VHE mode would expect them to be accessible via their
> > EL1 encoding, which we do not trap.
> > 
> > For vEL2 we write the virtual EL2 registers with an identical format directly
> > into their EL1 counterpart, and translate the few registers that have a
> > different format for the same effect on the execution when running a
> > non-VHE guest guest hypervisor.
> > 
> > Based on an initial patch from Andre Przywara, rewritten many times
> > since.
> > 
> > Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > ---
> >  arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h |   5 +-
> >  arch/arm64/kvm/hyp/nvhe/sysreg-sr.c        |   2 +-
> >  arch/arm64/kvm/hyp/vhe/sysreg-sr.c         | 137 ++++++++++++++++++++-
> >  3 files changed, 139 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> > index 1579a3c08a36b..d67628d01bf5e 100644
> > --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> > +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> > @@ -152,9 +152,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
> >  	write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0),	tpidrro_el0);
> >  }
> >  
> > -static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
> > +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
> > +					      u64 mpidr)
> >  {
> > -	write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1),	vmpidr_el2);
> > +	write_sysreg(mpidr,				vmpidr_el2);
> >  
> >  	if (has_vhe() ||
> >  	    !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
> > diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
> > index 29305022bc048..dba101565de36 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
> > @@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
> >  
> >  void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
> >  {
> > -	__sysreg_restore_el1_state(ctxt);
> > +	__sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1));
> >  	__sysreg_restore_common_state(ctxt);
> >  	__sysreg_restore_user_state(ctxt);
> >  	__sysreg_restore_el2_return_state(ctxt);
> > diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
> > index e12bd7d6d2dce..e0df14ead2657 100644
> > --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
> > +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
> > @@ -15,6 +15,108 @@
> >  #include <asm/kvm_hyp.h>
> >  #include <asm/kvm_nested.h>
> >  
> > +static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
> > +{
> > +	/* These registers are common with EL1 */
> > +	__vcpu_sys_reg(vcpu, PAR_EL1)	= read_sysreg(par_el1);
> > +	__vcpu_sys_reg(vcpu, TPIDR_EL1)	= read_sysreg(tpidr_el1);
> > +
> > +	__vcpu_sys_reg(vcpu, ESR_EL2)	= read_sysreg_el1(SYS_ESR);
> > +	__vcpu_sys_reg(vcpu, AFSR0_EL2)	= read_sysreg_el1(SYS_AFSR0);
> > +	__vcpu_sys_reg(vcpu, AFSR1_EL2)	= read_sysreg_el1(SYS_AFSR1);
> > +	__vcpu_sys_reg(vcpu, FAR_EL2)	= read_sysreg_el1(SYS_FAR);
> > +	__vcpu_sys_reg(vcpu, MAIR_EL2)	= read_sysreg_el1(SYS_MAIR);
> > +	__vcpu_sys_reg(vcpu, VBAR_EL2)	= read_sysreg_el1(SYS_VBAR);
> > +	__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR);
> > +	__vcpu_sys_reg(vcpu, AMAIR_EL2)	= read_sysreg_el1(SYS_AMAIR);
> > +
> > +	/*
> > +	 * In VHE mode those registers are compatible between EL1 and EL2,
> > +	 * and the guest uses the _EL1 versions on the CPU naturally.
> > +	 * So we save them into their _EL2 versions here.
> > +	 * For nVHE mode we trap accesses to those registers, so our
> > +	 * _EL2 copy in sys_regs[] is always up-to-date and we don't need
> > +	 * to save anything here.
> > +	 */
> > +	if (vcpu_el2_e2h_is_set(vcpu)) {
> > +		u64 val;
> > +
> > +		/*
> > +		 * We don't save CPTR_EL2, as accesses to CPACR_EL1
> > +		 * are always trapped, ensuring that the in-memory
> > +		 * copy is always up-to-date. A small blessing...
> > +		 */
> > +		__vcpu_sys_reg(vcpu, SCTLR_EL2)	= read_sysreg_el1(SYS_SCTLR);
> > +		__vcpu_sys_reg(vcpu, TTBR0_EL2)	= read_sysreg_el1(SYS_TTBR0);
> > +		__vcpu_sys_reg(vcpu, TTBR1_EL2)	= read_sysreg_el1(SYS_TTBR1);
> > +		__vcpu_sys_reg(vcpu, TCR_EL2)	= read_sysreg_el1(SYS_TCR);
> > +
> > +		/*
> > +		 * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where
> > +		 * the interesting CNTHCTL_EL2 bits live. So preserve these
> > +		 * bits when reading back the guest-visible value.
> > +		 */
> > +		val = read_sysreg_el1(SYS_CNTKCTL);
> > +		val &= CNTKCTL_VALID_BITS;
> > +		__vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS;
> > +		__vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val;
> > +	}
> > +
> > +	__vcpu_sys_reg(vcpu, SP_EL2)	= read_sysreg(sp_el1);
> > +	__vcpu_sys_reg(vcpu, ELR_EL2)	= read_sysreg_el1(SYS_ELR);
> > +	__vcpu_sys_reg(vcpu, SPSR_EL2)	= read_sysreg_el1(SYS_SPSR);
> > +}
> > +
> > +static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
> > +{
> > +	u64 val;
> > +
> > +	/* These registers are common with EL1 */
> > +	write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1),	par_el1);
> > +	write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1),	tpidr_el1);
> > +
> > +	write_sysreg(read_cpuid_id(),				vpidr_el2);
> > +	write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1),		vmpidr_el2);
> > +	write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2),	SYS_MAIR);
> > +	write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2),	SYS_VBAR);
> > +	write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2),	SYS_CONTEXTIDR);
> > +	write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2),	SYS_AMAIR);
> > +
> > +	if (vcpu_el2_e2h_is_set(vcpu)) {
> > +		/*
> > +		 * In VHE mode those registers are compatible between
> > +		 * EL1 and EL2.
> > +		 */
> > +		write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2),   SYS_SCTLR);
> > +		write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2),    SYS_CPACR);
> > +		write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2),   SYS_TTBR0);
> > +		write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2),   SYS_TTBR1);
> > +		write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2),	    SYS_TCR);
> > +		write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL);
> > +	} else {
> > +		/*
> > +		 * CNTHCTL_EL2 only affects EL1 when running nVHE, so
> > +		 * no need to restore it.
> > +		 */
> 
> I'm having such a hard time parsing the comment - might be just me coming back to
> this code after such a long time.
> 
> If CNTHCTL_EL2 only affects EL1 when running nVHE, and the else branch deals
> with the nVHE case, why isn't CNTHCTL_EL2 restored?

Because it has no impact at all? As in nothing? Niente? Rien? Zilch?
We enter the guest's EL2, so why would we bother with restoring a
guest register that has no influence on what we run?

> 
> As for the 'only' part of the comment: when E2H=1, bits 10 and 11, EL1PCTEN and
> EL1PTEN (why isn't this named EL1PCEN if it does the same thing as bit 1 when
> E2H=0?), trap EL1 and EL0 accesses to physical counter and timer registers.
> 
> Or 'only' in this context means only EL1, and not EL2 also?

None of this makes any sense to me. I don't understand your E2H
consideration, nor your digression on the meaning of the word 'only'.

Look at the architecture. Do you see *ANY* bit in CNTHCTL_EL2 having
*ANY* influence on EL2 when HCR_EL2.E2H=0? Don't you then come to the
conclusion that CNTHCTL_EL2 only affects EL1?

But surely you've spotted something I can't see, and I must be
specially thick today... Please enlighten me.

	M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 1579a3c08a36b..d67628d01bf5e 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -152,9 +152,10 @@  static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0),	tpidrro_el0);
 }
 
-static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
+					      u64 mpidr)
 {
-	write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1),	vmpidr_el2);
+	write_sysreg(mpidr,				vmpidr_el2);
 
 	if (has_vhe() ||
 	    !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
index 29305022bc048..dba101565de36 100644
--- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
@@ -28,7 +28,7 @@  void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
 
 void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
 {
-	__sysreg_restore_el1_state(ctxt);
+	__sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1));
 	__sysreg_restore_common_state(ctxt);
 	__sysreg_restore_user_state(ctxt);
 	__sysreg_restore_el2_return_state(ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index e12bd7d6d2dce..e0df14ead2657 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -15,6 +15,108 @@ 
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_nested.h>
 
+static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
+{
+	/* These registers are common with EL1 */
+	__vcpu_sys_reg(vcpu, PAR_EL1)	= read_sysreg(par_el1);
+	__vcpu_sys_reg(vcpu, TPIDR_EL1)	= read_sysreg(tpidr_el1);
+
+	__vcpu_sys_reg(vcpu, ESR_EL2)	= read_sysreg_el1(SYS_ESR);
+	__vcpu_sys_reg(vcpu, AFSR0_EL2)	= read_sysreg_el1(SYS_AFSR0);
+	__vcpu_sys_reg(vcpu, AFSR1_EL2)	= read_sysreg_el1(SYS_AFSR1);
+	__vcpu_sys_reg(vcpu, FAR_EL2)	= read_sysreg_el1(SYS_FAR);
+	__vcpu_sys_reg(vcpu, MAIR_EL2)	= read_sysreg_el1(SYS_MAIR);
+	__vcpu_sys_reg(vcpu, VBAR_EL2)	= read_sysreg_el1(SYS_VBAR);
+	__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR);
+	__vcpu_sys_reg(vcpu, AMAIR_EL2)	= read_sysreg_el1(SYS_AMAIR);
+
+	/*
+	 * In VHE mode those registers are compatible between EL1 and EL2,
+	 * and the guest uses the _EL1 versions on the CPU naturally.
+	 * So we save them into their _EL2 versions here.
+	 * For nVHE mode we trap accesses to those registers, so our
+	 * _EL2 copy in sys_regs[] is always up-to-date and we don't need
+	 * to save anything here.
+	 */
+	if (vcpu_el2_e2h_is_set(vcpu)) {
+		u64 val;
+
+		/*
+		 * We don't save CPTR_EL2, as accesses to CPACR_EL1
+		 * are always trapped, ensuring that the in-memory
+		 * copy is always up-to-date. A small blessing...
+		 */
+		__vcpu_sys_reg(vcpu, SCTLR_EL2)	= read_sysreg_el1(SYS_SCTLR);
+		__vcpu_sys_reg(vcpu, TTBR0_EL2)	= read_sysreg_el1(SYS_TTBR0);
+		__vcpu_sys_reg(vcpu, TTBR1_EL2)	= read_sysreg_el1(SYS_TTBR1);
+		__vcpu_sys_reg(vcpu, TCR_EL2)	= read_sysreg_el1(SYS_TCR);
+
+		/*
+		 * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where
+		 * the interesting CNTHCTL_EL2 bits live. So preserve these
+		 * bits when reading back the guest-visible value.
+		 */
+		val = read_sysreg_el1(SYS_CNTKCTL);
+		val &= CNTKCTL_VALID_BITS;
+		__vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS;
+		__vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val;
+	}
+
+	__vcpu_sys_reg(vcpu, SP_EL2)	= read_sysreg(sp_el1);
+	__vcpu_sys_reg(vcpu, ELR_EL2)	= read_sysreg_el1(SYS_ELR);
+	__vcpu_sys_reg(vcpu, SPSR_EL2)	= read_sysreg_el1(SYS_SPSR);
+}
+
+static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+	/* These registers are common with EL1 */
+	write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1),	par_el1);
+	write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1),	tpidr_el1);
+
+	write_sysreg(read_cpuid_id(),				vpidr_el2);
+	write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1),		vmpidr_el2);
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2),	SYS_MAIR);
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2),	SYS_VBAR);
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2),	SYS_CONTEXTIDR);
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2),	SYS_AMAIR);
+
+	if (vcpu_el2_e2h_is_set(vcpu)) {
+		/*
+		 * In VHE mode those registers are compatible between
+		 * EL1 and EL2.
+		 */
+		write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2),   SYS_SCTLR);
+		write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2),    SYS_CPACR);
+		write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2),   SYS_TTBR0);
+		write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2),   SYS_TTBR1);
+		write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2),	    SYS_TCR);
+		write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL);
+	} else {
+		/*
+		 * CNTHCTL_EL2 only affects EL1 when running nVHE, so
+		 * no need to restore it.
+		 */
+		val = translate_sctlr_el2_to_sctlr_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2));
+		write_sysreg_el1(val, SYS_SCTLR);
+		val = translate_cptr_el2_to_cpacr_el1(__vcpu_sys_reg(vcpu, CPTR_EL2));
+		write_sysreg_el1(val, SYS_CPACR);
+		val = translate_ttbr0_el2_to_ttbr0_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2));
+		write_sysreg_el1(val, SYS_TTBR0);
+		val = translate_tcr_el2_to_tcr_el1(__vcpu_sys_reg(vcpu, TCR_EL2));
+		write_sysreg_el1(val, SYS_TCR);
+	}
+
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, ESR_EL2),		SYS_ESR);
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR0_EL2),	SYS_AFSR0);
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR1_EL2),	SYS_AFSR1);
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, FAR_EL2),		SYS_FAR);
+	write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2),		sp_el1);
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2),		SYS_ELR);
+	write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2),	SYS_SPSR);
+}
+
 /*
  * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
  * pstate, which are handled as part of the el2 return state) on every
@@ -66,6 +168,7 @@  void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
 	struct kvm_cpu_context *host_ctxt;
+	u64 mpidr;
 
 	host_ctxt = host_data_ptr(host_ctxt);
 	__sysreg_save_user_state(host_ctxt);
@@ -89,7 +192,29 @@  void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
 	 */
 	__sysreg32_restore_state(vcpu);
 	__sysreg_restore_user_state(guest_ctxt);
-	__sysreg_restore_el1_state(guest_ctxt);
+
+	if (unlikely(__is_hyp_ctxt(guest_ctxt))) {
+		__sysreg_restore_vel2_state(vcpu);
+	} else {
+		if (vcpu_has_nv(vcpu)) {
+			/*
+			 * Only set VPIDR_EL2 for nested VMs, as this is the
+			 * only time it changes. We'll restore the MIDR_EL1
+			 * view on put.
+			 */
+			write_sysreg(ctxt_sys_reg(guest_ctxt, VPIDR_EL2), vpidr_el2);
+
+			/*
+			 * As we're restoring a nested guest, set the value
+			 * provided by the guest hypervisor.
+			 */
+			mpidr = ctxt_sys_reg(guest_ctxt, VMPIDR_EL2);
+		} else {
+			mpidr = ctxt_sys_reg(guest_ctxt, MPIDR_EL1);
+		}
+
+		__sysreg_restore_el1_state(guest_ctxt, mpidr);
+	}
 
 	vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
 }
@@ -112,12 +237,20 @@  void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
 
 	host_ctxt = host_data_ptr(host_ctxt);
 
-	__sysreg_save_el1_state(guest_ctxt);
+	if (unlikely(__is_hyp_ctxt(guest_ctxt)))
+		__sysreg_save_vel2_state(vcpu);
+	else
+		__sysreg_save_el1_state(guest_ctxt);
+
 	__sysreg_save_user_state(guest_ctxt);
 	__sysreg32_save_state(vcpu);
 
 	/* Restore host user state */
 	__sysreg_restore_user_state(host_ctxt);
 
+	/* If leaving a nesting guest, restore MPIDR_EL1 default view */
+	if (vcpu_has_nv(vcpu))
+		write_sysreg(read_cpuid_id(),	vpidr_el2);
+
 	vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
 }