Message ID | 20241009190019.3222687-8-maz@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | KVM: arm64: Add EL2 support to FEAT_S1PIE/S1POE | expand |
On Wed, Oct 09, 2024 at 07:59:50PM +0100, Marc Zyngier wrote: > +static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) > +{ > + u64 val; > + > + /* These registers are common with EL1 */ > + write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1), par_el1); > + write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1), tpidr_el1); > + > + write_sysreg(read_cpuid_id(), vpidr_el2); I don't think we need to restore VPIDR_EL2 here, so long as we do it on vcpu_put() when leaving a nested VM context. That seems like the right place to have it, as we could be running a mix of nested and non-nested VMs and don't ever poke VPIDR_EL2 for non-NV VMs. > @@ -89,7 +192,29 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) > */ > __sysreg32_restore_state(vcpu); > __sysreg_restore_user_state(guest_ctxt); > - __sysreg_restore_el1_state(guest_ctxt); > + > + if (unlikely(__is_hyp_ctxt(guest_ctxt))) { > + __sysreg_restore_vel2_state(vcpu); > + } else { > + if (vcpu_has_nv(vcpu)) { > + /* > + * Only set VPIDR_EL2 for nested VMs, as this is the > + * only time it changes. We'll restore the MIDR_EL1 > + * view on put. > + */ Slightly ambiguous what "VPIDR_EL2" this is referring to (hardware reg v. guest value). Maybe: /* * Use the guest hypervisor's VPIDR_EL2 when in a nested * state. The hardware value of MIDR_EL1 gets restored on * put. */ > + write_sysreg(ctxt_sys_reg(guest_ctxt, VPIDR_EL2), vpidr_el2); > + > + /* > + * As we're restoring a nested guest, set the value > + * provided by the guest hypervisor. > + */ > + mpidr = ctxt_sys_reg(guest_ctxt, VMPIDR_EL2); > + } else { > + mpidr = ctxt_sys_reg(guest_ctxt, MPIDR_EL1); > + } > + > + __sysreg_restore_el1_state(guest_ctxt, mpidr); > + } > > vcpu_set_flag(vcpu, SYSREGS_ON_CPU); > } > @@ -112,12 +237,20 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu) > > host_ctxt = host_data_ptr(host_ctxt); > > - __sysreg_save_el1_state(guest_ctxt); > + if (unlikely(__is_hyp_ctxt(guest_ctxt))) > + __sysreg_save_vel2_state(vcpu); > + else > + __sysreg_save_el1_state(guest_ctxt); > + > __sysreg_save_user_state(guest_ctxt); > __sysreg32_save_state(vcpu); > > /* Restore host user state */ > __sysreg_restore_user_state(host_ctxt); > > + /* If leaving a nesting guest, restore MPIDR_EL1 default view */ typo: MIDR_EL1 > + if (vcpu_has_nv(vcpu)) > + write_sysreg(read_cpuid_id(), vpidr_el2); > + > vcpu_clear_flag(vcpu, SYSREGS_ON_CPU); > } > -- > 2.39.2 >
Hi Marc, On Wed, Oct 09, 2024 at 07:59:50PM +0100, Marc Zyngier wrote: > Whenever we need to restore the guest's system registers to the CPU, we > now need to take care of the EL2 system registers as well. Most of them > are accessed via traps only, but some have an immediate effect and also > a guest running in VHE mode would expect them to be accessible via their > EL1 encoding, which we do not trap. > > For vEL2 we write the virtual EL2 registers with an identical format directly > into their EL1 counterpart, and translate the few registers that have a > different format for the same effect on the execution when running a > non-VHE guest guest hypervisor. > > Based on an initial patch from Andre Przywara, rewritten many times > since. > > Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com> > Signed-off-by: Marc Zyngier <maz@kernel.org> > --- > arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 5 +- > arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 2 +- > arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 137 ++++++++++++++++++++- > 3 files changed, 139 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > index 1579a3c08a36b..d67628d01bf5e 100644 > --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > @@ -152,9 +152,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) > write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); > } > > -static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) > +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt, > + u64 mpidr) > { > - write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); > + write_sysreg(mpidr, vmpidr_el2); > > if (has_vhe() || > !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { > diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c > index 29305022bc048..dba101565de36 100644 > --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c > +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c > @@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) > > void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) > { > - __sysreg_restore_el1_state(ctxt); > + __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1)); > __sysreg_restore_common_state(ctxt); > __sysreg_restore_user_state(ctxt); > __sysreg_restore_el2_return_state(ctxt); > diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c > index e12bd7d6d2dce..e0df14ead2657 100644 > --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c > +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c > @@ -15,6 +15,108 @@ > #include <asm/kvm_hyp.h> > #include <asm/kvm_nested.h> > > +static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) > +{ > + /* These registers are common with EL1 */ > + __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1); > + __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1); > + > + __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR); > + __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0); > + __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1); > + __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR); > + __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR); > + __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR); > + __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR); > + __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR); > + > + /* > + * In VHE mode those registers are compatible between EL1 and EL2, > + * and the guest uses the _EL1 versions on the CPU naturally. > + * So we save them into their _EL2 versions here. > + * For nVHE mode we trap accesses to those registers, so our > + * _EL2 copy in sys_regs[] is always up-to-date and we don't need > + * to save anything here. > + */ > + if (vcpu_el2_e2h_is_set(vcpu)) { > + u64 val; > + > + /* > + * We don't save CPTR_EL2, as accesses to CPACR_EL1 > + * are always trapped, ensuring that the in-memory > + * copy is always up-to-date. A small blessing... > + */ > + __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR); > + __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0); > + __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); > + __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); > + > + /* > + * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where > + * the interesting CNTHCTL_EL2 bits live. So preserve these > + * bits when reading back the guest-visible value. > + */ > + val = read_sysreg_el1(SYS_CNTKCTL); > + val &= CNTKCTL_VALID_BITS; > + __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS; > + __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; > + } > + > + __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1); > + __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR); > + __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR); > +} > + > +static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) > +{ > + u64 val; > + > + /* These registers are common with EL1 */ > + write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1), par_el1); > + write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1), tpidr_el1); > + > + write_sysreg(read_cpuid_id(), vpidr_el2); > + write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1), vmpidr_el2); > + write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2), SYS_MAIR); > + write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2), SYS_VBAR); > + write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2), SYS_CONTEXTIDR); > + write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2), SYS_AMAIR); > + > + if (vcpu_el2_e2h_is_set(vcpu)) { > + /* > + * In VHE mode those registers are compatible between > + * EL1 and EL2. > + */ > + write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2), SYS_SCTLR); > + write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2), SYS_CPACR); > + write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2), SYS_TTBR0); > + write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2), SYS_TTBR1); > + write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2), SYS_TCR); > + write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL); > + } else { > + /* > + * CNTHCTL_EL2 only affects EL1 when running nVHE, so > + * no need to restore it. > + */ I'm having such a hard time parsing the comment - might be just me coming back to this code after such a long time. If CNTHCTL_EL2 only affects EL1 when running nVHE, and the else branch deals with the nVHE case, why isn't CNTHCTL_EL2 restored? As for the 'only' part of the comment: when E2H=1, bits 10 and 11, EL1PCTEN and EL1PTEN (why isn't this named EL1PCEN if it does the same thing as bit 1 when E2H=0?), trap EL1 and EL0 accesses to physical counter and timer registers. Or 'only' in this context means only EL1, and not EL2 also? Thanks, Alex > + val = translate_sctlr_el2_to_sctlr_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2)); > + write_sysreg_el1(val, SYS_SCTLR); > + val = translate_cptr_el2_to_cpacr_el1(__vcpu_sys_reg(vcpu, CPTR_EL2)); > + write_sysreg_el1(val, SYS_CPACR); > + val = translate_ttbr0_el2_to_ttbr0_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2)); > + write_sysreg_el1(val, SYS_TTBR0); > + val = translate_tcr_el2_to_tcr_el1(__vcpu_sys_reg(vcpu, TCR_EL2)); > + write_sysreg_el1(val, SYS_TCR); > + }
On Wed, 16 Oct 2024 14:12:49 +0100, Alexandru Elisei <alexandru.elisei@arm.com> wrote: > > Hi Marc, > > On Wed, Oct 09, 2024 at 07:59:50PM +0100, Marc Zyngier wrote: > > Whenever we need to restore the guest's system registers to the CPU, we > > now need to take care of the EL2 system registers as well. Most of them > > are accessed via traps only, but some have an immediate effect and also > > a guest running in VHE mode would expect them to be accessible via their > > EL1 encoding, which we do not trap. > > > > For vEL2 we write the virtual EL2 registers with an identical format directly > > into their EL1 counterpart, and translate the few registers that have a > > different format for the same effect on the execution when running a > > non-VHE guest guest hypervisor. > > > > Based on an initial patch from Andre Przywara, rewritten many times > > since. > > > > Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com> > > Signed-off-by: Marc Zyngier <maz@kernel.org> > > --- > > arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 5 +- > > arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 2 +- > > arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 137 ++++++++++++++++++++- > > 3 files changed, 139 insertions(+), 5 deletions(-) > > > > diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > > index 1579a3c08a36b..d67628d01bf5e 100644 > > --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > > +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > > @@ -152,9 +152,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) > > write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); > > } > > > > -static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) > > +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt, > > + u64 mpidr) > > { > > - write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); > > + write_sysreg(mpidr, vmpidr_el2); > > > > if (has_vhe() || > > !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { > > diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c > > index 29305022bc048..dba101565de36 100644 > > --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c > > +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c > > @@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) > > > > void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) > > { > > - __sysreg_restore_el1_state(ctxt); > > + __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1)); > > __sysreg_restore_common_state(ctxt); > > __sysreg_restore_user_state(ctxt); > > __sysreg_restore_el2_return_state(ctxt); > > diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c > > index e12bd7d6d2dce..e0df14ead2657 100644 > > --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c > > +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c > > @@ -15,6 +15,108 @@ > > #include <asm/kvm_hyp.h> > > #include <asm/kvm_nested.h> > > > > +static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) > > +{ > > + /* These registers are common with EL1 */ > > + __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1); > > + __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1); > > + > > + __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR); > > + __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0); > > + __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1); > > + __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR); > > + __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR); > > + __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR); > > + __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR); > > + __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR); > > + > > + /* > > + * In VHE mode those registers are compatible between EL1 and EL2, > > + * and the guest uses the _EL1 versions on the CPU naturally. > > + * So we save them into their _EL2 versions here. > > + * For nVHE mode we trap accesses to those registers, so our > > + * _EL2 copy in sys_regs[] is always up-to-date and we don't need > > + * to save anything here. > > + */ > > + if (vcpu_el2_e2h_is_set(vcpu)) { > > + u64 val; > > + > > + /* > > + * We don't save CPTR_EL2, as accesses to CPACR_EL1 > > + * are always trapped, ensuring that the in-memory > > + * copy is always up-to-date. A small blessing... > > + */ > > + __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR); > > + __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0); > > + __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); > > + __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); > > + > > + /* > > + * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where > > + * the interesting CNTHCTL_EL2 bits live. So preserve these > > + * bits when reading back the guest-visible value. > > + */ > > + val = read_sysreg_el1(SYS_CNTKCTL); > > + val &= CNTKCTL_VALID_BITS; > > + __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS; > > + __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; > > + } > > + > > + __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1); > > + __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR); > > + __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR); > > +} > > + > > +static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) > > +{ > > + u64 val; > > + > > + /* These registers are common with EL1 */ > > + write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1), par_el1); > > + write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1), tpidr_el1); > > + > > + write_sysreg(read_cpuid_id(), vpidr_el2); > > + write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1), vmpidr_el2); > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2), SYS_MAIR); > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2), SYS_VBAR); > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2), SYS_CONTEXTIDR); > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2), SYS_AMAIR); > > + > > + if (vcpu_el2_e2h_is_set(vcpu)) { > > + /* > > + * In VHE mode those registers are compatible between > > + * EL1 and EL2. > > + */ > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2), SYS_SCTLR); > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2), SYS_CPACR); > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2), SYS_TTBR0); > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2), SYS_TTBR1); > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2), SYS_TCR); > > + write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL); > > + } else { > > + /* > > + * CNTHCTL_EL2 only affects EL1 when running nVHE, so > > + * no need to restore it. > > + */ > > I'm having such a hard time parsing the comment - might be just me coming back to > this code after such a long time. > > If CNTHCTL_EL2 only affects EL1 when running nVHE, and the else branch deals > with the nVHE case, why isn't CNTHCTL_EL2 restored? Because it has no impact at all? As in nothing? Niente? Rien? Zilch? We enter the guest's EL2, so why would we bother with restoring a guest register that has no influence on what we run? > > As for the 'only' part of the comment: when E2H=1, bits 10 and 11, EL1PCTEN and > EL1PTEN (why isn't this named EL1PCEN if it does the same thing as bit 1 when > E2H=0?), trap EL1 and EL0 accesses to physical counter and timer registers. > > Or 'only' in this context means only EL1, and not EL2 also? None of this makes any sense to me. I don't understand your E2H consideration, nor your digression on the meaning of the word 'only'. Look at the architecture. Do you see *ANY* bit in CNTHCTL_EL2 having *ANY* influence on EL2 when HCR_EL2.E2H=0? Don't you then come to the conclusion that CNTHCTL_EL2 only affects EL1? But surely you've spotted something I can't see, and I must be specially thick today... Please enlighten me. M.
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 1579a3c08a36b..d67628d01bf5e 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -152,9 +152,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); } -static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt, + u64 mpidr) { - write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); + write_sysreg(mpidr, vmpidr_el2); if (has_vhe() || !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c index 29305022bc048..dba101565de36 100644 --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) { - __sysreg_restore_el1_state(ctxt); + __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1)); __sysreg_restore_common_state(ctxt); __sysreg_restore_user_state(ctxt); __sysreg_restore_el2_return_state(ctxt); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index e12bd7d6d2dce..e0df14ead2657 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -15,6 +15,108 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_nested.h> +static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) +{ + /* These registers are common with EL1 */ + __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1); + __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1); + + __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR); + __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0); + __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1); + __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR); + __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR); + __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR); + __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR); + __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR); + + /* + * In VHE mode those registers are compatible between EL1 and EL2, + * and the guest uses the _EL1 versions on the CPU naturally. + * So we save them into their _EL2 versions here. + * For nVHE mode we trap accesses to those registers, so our + * _EL2 copy in sys_regs[] is always up-to-date and we don't need + * to save anything here. + */ + if (vcpu_el2_e2h_is_set(vcpu)) { + u64 val; + + /* + * We don't save CPTR_EL2, as accesses to CPACR_EL1 + * are always trapped, ensuring that the in-memory + * copy is always up-to-date. A small blessing... + */ + __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR); + __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0); + __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); + __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); + + /* + * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where + * the interesting CNTHCTL_EL2 bits live. So preserve these + * bits when reading back the guest-visible value. + */ + val = read_sysreg_el1(SYS_CNTKCTL); + val &= CNTKCTL_VALID_BITS; + __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS; + __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; + } + + __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1); + __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR); + __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR); +} + +static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* These registers are common with EL1 */ + write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1), par_el1); + write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1), tpidr_el1); + + write_sysreg(read_cpuid_id(), vpidr_el2); + write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1), vmpidr_el2); + write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2), SYS_MAIR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2), SYS_VBAR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2), SYS_CONTEXTIDR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2), SYS_AMAIR); + + if (vcpu_el2_e2h_is_set(vcpu)) { + /* + * In VHE mode those registers are compatible between + * EL1 and EL2. + */ + write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2), SYS_SCTLR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2), SYS_CPACR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2), SYS_TTBR0); + write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2), SYS_TTBR1); + write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2), SYS_TCR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL); + } else { + /* + * CNTHCTL_EL2 only affects EL1 when running nVHE, so + * no need to restore it. + */ + val = translate_sctlr_el2_to_sctlr_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2)); + write_sysreg_el1(val, SYS_SCTLR); + val = translate_cptr_el2_to_cpacr_el1(__vcpu_sys_reg(vcpu, CPTR_EL2)); + write_sysreg_el1(val, SYS_CPACR); + val = translate_ttbr0_el2_to_ttbr0_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2)); + write_sysreg_el1(val, SYS_TTBR0); + val = translate_tcr_el2_to_tcr_el1(__vcpu_sys_reg(vcpu, TCR_EL2)); + write_sysreg_el1(val, SYS_TCR); + } + + write_sysreg_el1(__vcpu_sys_reg(vcpu, ESR_EL2), SYS_ESR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR0_EL2), SYS_AFSR0); + write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR1_EL2), SYS_AFSR1); + write_sysreg_el1(__vcpu_sys_reg(vcpu, FAR_EL2), SYS_FAR); + write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1); + write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR); +} + /* * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and * pstate, which are handled as part of the el2 return state) on every @@ -66,6 +168,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; + u64 mpidr; host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_user_state(host_ctxt); @@ -89,7 +192,29 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) */ __sysreg32_restore_state(vcpu); __sysreg_restore_user_state(guest_ctxt); - __sysreg_restore_el1_state(guest_ctxt); + + if (unlikely(__is_hyp_ctxt(guest_ctxt))) { + __sysreg_restore_vel2_state(vcpu); + } else { + if (vcpu_has_nv(vcpu)) { + /* + * Only set VPIDR_EL2 for nested VMs, as this is the + * only time it changes. We'll restore the MIDR_EL1 + * view on put. + */ + write_sysreg(ctxt_sys_reg(guest_ctxt, VPIDR_EL2), vpidr_el2); + + /* + * As we're restoring a nested guest, set the value + * provided by the guest hypervisor. + */ + mpidr = ctxt_sys_reg(guest_ctxt, VMPIDR_EL2); + } else { + mpidr = ctxt_sys_reg(guest_ctxt, MPIDR_EL1); + } + + __sysreg_restore_el1_state(guest_ctxt, mpidr); + } vcpu_set_flag(vcpu, SYSREGS_ON_CPU); } @@ -112,12 +237,20 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); - __sysreg_save_el1_state(guest_ctxt); + if (unlikely(__is_hyp_ctxt(guest_ctxt))) + __sysreg_save_vel2_state(vcpu); + else + __sysreg_save_el1_state(guest_ctxt); + __sysreg_save_user_state(guest_ctxt); __sysreg32_save_state(vcpu); /* Restore host user state */ __sysreg_restore_user_state(host_ctxt); + /* If leaving a nesting guest, restore MPIDR_EL1 default view */ + if (vcpu_has_nv(vcpu)) + write_sysreg(read_cpuid_id(), vpidr_el2); + vcpu_clear_flag(vcpu, SYSREGS_ON_CPU); }