Message ID | 20230209175820.1939006-9-maz@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Prefix patches for NV support | expand |
Hi Marc, On Thu, Feb 09, 2023 at 05:58:10PM +0000, Marc Zyngier wrote: > From: Jintack Lim <jintack.lim@linaro.org> > > ARM v8.3 introduces a new bit in the HCR_EL2, which is the NV bit. When > this bit is set, accessing EL2 registers in EL1 traps to EL2. In > addition, executing the following instructions in EL1 will trap to EL2: > tlbi, at, eret, and msr/mrs instructions to access SP_EL1. Most of the > instructions that trap to EL2 with the NV bit were undef at EL1 prior to > ARM v8.3. The only instruction that was not undef is eret. > > This patch sets up a handler for EL2 registers and SP_EL1 register > accesses at EL1. The host hypervisor keeps those register values in > memory, and will emulate their behavior. > > This patch doesn't set the NV bit yet. It will be set in a later patch > once nested virtualization support is completed. > > Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk> > Signed-off-by: Jintack Lim <jintack.lim@linaro.org> > [maz: EL2_REG() macros] > Signed-off-by: Marc Zyngier <maz@kernel.org> [..] > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index c6cbfe6b854b..1e6ae3b2e6dd 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -24,6 +24,7 @@ > #include <asm/kvm_emulate.h> > #include <asm/kvm_hyp.h> > #include <asm/kvm_mmu.h> > +#include <asm/kvm_nested.h> > #include <asm/perf_event.h> > #include <asm/sysreg.h> > > @@ -102,6 +103,18 @@ static u32 get_ccsidr(u32 csselr) > return ccsidr; > } > > +static bool access_rw(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + if (p->is_write) > + vcpu_write_sys_reg(vcpu, p->regval, r->reg); > + else > + p->regval = vcpu_read_sys_reg(vcpu, r->reg); > + > + return true; > +} > + > /* > * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). > */ > @@ -260,6 +273,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, > return read_zero(vcpu, p); > } > > +static bool trap_undef(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + kvm_inject_undefined(vcpu); > + return false; > +} > + > /* > * ARMv8.1 mandates at least a trivial LORegion implementation, where all the > * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0 > @@ -370,12 +391,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, > struct sys_reg_params *p, > const struct sys_reg_desc *r) > { > - if (p->is_write) { > - vcpu_write_sys_reg(vcpu, p->regval, r->reg); > + access_rw(vcpu, p, r); > + if (p->is_write) > vcpu_set_flag(vcpu, DEBUG_DIRTY); > - } else { > - p->regval = vcpu_read_sys_reg(vcpu, r->reg); > - } > > trace_trap_reg(__func__, r->reg, p->is_write, p->regval); > > @@ -1446,6 +1464,24 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, > .visibility = mte_visibility, \ > } > > +static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, > + const struct sys_reg_desc *rd) > +{ > + if (vcpu_has_nv(vcpu)) > + return 0; > + > + return REG_HIDDEN; > +} > + > +#define EL2_REG(name, acc, rst, v) { \ > + SYS_DESC(SYS_##name), \ > + .access = acc, \ > + .reset = rst, \ > + .reg = name, \ > + .visibility = el2_visibility, \ > + .val = v, \ > +} > + > /* sys_reg_desc initialiser for known cpufeature ID registers */ > #define ID_SANITISED(name) { \ > SYS_DESC(SYS_##name), \ > @@ -1490,6 +1526,18 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, > .visibility = raz_visibility, \ > } > > +static bool access_sp_el1(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + if (p->is_write) > + __vcpu_sys_reg(vcpu, SP_EL1) = p->regval; > + else > + p->regval = __vcpu_sys_reg(vcpu, SP_EL1); > + > + return true; > +} > + > /* > * Architected system registers. > * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 > @@ -1913,9 +1961,50 @@ static const struct sys_reg_desc sys_reg_descs[] = { > { PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper, > .reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 }, > > + EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0), > + EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0), > + EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), > + EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), > + EL2_REG(HCR_EL2, access_rw, reset_val, 0), > + EL2_REG(MDCR_EL2, access_rw, reset_val, 0), > + EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_EL2_DEFAULT ), > + EL2_REG(HSTR_EL2, access_rw, reset_val, 0), > + EL2_REG(HACR_EL2, access_rw, reset_val, 0), > + > + EL2_REG(TTBR0_EL2, access_rw, reset_val, 0), > + EL2_REG(TTBR1_EL2, access_rw, reset_val, 0), > + EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1), > + EL2_REG(VTTBR_EL2, access_rw, reset_val, 0), > + EL2_REG(VTCR_EL2, access_rw, reset_val, 0), > + > { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, > + EL2_REG(SPSR_EL2, access_rw, reset_val, 0), > + EL2_REG(ELR_EL2, access_rw, reset_val, 0), > + { SYS_DESC(SYS_SP_EL1), access_sp_el1}, > + > { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, > + EL2_REG(AFSR0_EL2, access_rw, reset_val, 0), > + EL2_REG(AFSR1_EL2, access_rw, reset_val, 0), > + EL2_REG(ESR_EL2, access_rw, reset_val, 0), > { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 }, > + > + EL2_REG(FAR_EL2, access_rw, reset_val, 0), > + EL2_REG(HPFAR_EL2, access_rw, reset_val, 0), > + > + EL2_REG(MAIR_EL2, access_rw, reset_val, 0), > + EL2_REG(AMAIR_EL2, access_rw, reset_val, 0), > + > + EL2_REG(VBAR_EL2, access_rw, reset_val, 0), > + EL2_REG(RVBAR_EL2, access_rw, reset_val, 0), > + { SYS_DESC(SYS_RMR_EL2), trap_undef }, > + > + EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0), > + EL2_REG(TPIDR_EL2, access_rw, reset_val, 0), > + > + EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0), > + EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), > + > + EL2_REG(SP_EL2, NULL, reset_unknown, 0), > }; > > static bool trap_dbgdidr(struct kvm_vcpu *vcpu, > -- > 2.34.1 > > I'm having an issue with this commit where a VCPU is getting a CNTVOFF_EL2 set to 0, so it sees the same time as the host system, and the other VCPU has the correct offset. This is due to the new line added in this commit: EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0), Here is an excerpt of the dmesg: [ 0.000000] Booting Linux on physical CPU 0x0000000000 [0x000f0510] [ 0.000000] Linux version 5.14.0-rc1-00011-gc2b59ec4a322-dirty SMP PREEMPT [ 0.000000] Machine model: linux,dummy-virt [ 0.000000] efi: UEFI not found. [ 0.000000] earlycon: ns16550a0 at MMIO 0x0000000001000000 (options '') [ 0.000000] printk: bootconsole [ns16550a0] enabled [ 0.000000] NUMA: No NUMA configuration found [ 0.000000] NUMA: Faking a node at [mem 0x0000000080000000-0x0000000093ffffff] [ 0.000000] NUMA: NODE_DATA [mem 0x93f60c00-0x93f62fff] [ 0.000000] Zone ranges: [ 0.000000] DMA [mem 0x0000000080000000-0x0000000093ffffff] [ 0.000000] DMA32 empty [ 0.000000] Normal empty [ 0.000000] Movable zone start for each node [ 0.000000] Early memory node ranges [ 0.000000] node 0: [mem 0x0000000080000000-0x0000000093ffffff] [ 0.000000] Initmem setup node 0 [mem 0x0000000080000000-0x0000000093ffffff] [ 0.000000] On node 0, zone DMA: 16384 pages in unavailable ranges [ 0.000000] cma: Reserved 32 MiB at 0x0000000091800000 [ 0.000000] psci: probing for conduit method from DT. [ 0.000000] psci: PSCIv1.1 detected in firmware. [ 0.000000] psci: Using standard PSCI v0.2 function IDs [ 0.000000] psci: Trusted OS migration not required [ 0.000000] psci: SMC Calling Convention v1.1 [ 0.000000] smccc: KVM: hypervisor services detected (0x00000000 0x00000000 0x00000000 0x00000003) [ 0.000000] percpu: Embedded 33 pages/cpu s96088 r8192 d30888 u135168 [ 0.000000] Detected PIPT I-cache on CPU0 [ 0.000000] CPU features: detected: Branch Target Identification [ 0.000000] CPU features: detected: GIC system register CPU interface [ 0.000000] CPU features: detected: Spectre-v4 [ 0.000000] CPU features: kernel page table isolation forced ON by KASLR [ 0.000000] CPU features: detected: Kernel page table isolation (KPTI) [..] [ 0.516017] fsl-mc MSI: msic domain created [ 0.544449] EFI services will not be available. [ 0.575385] smp: Bringing up secondary CPUs ... [ 0.658517] smp: Brought up 1 node, 2 CPUs [ 0.849724] SMP: Total of 2 processors activated. [ 0.893825] CPU features: detected: 32-bit EL0 Support [ 0.935321] CPU features: detected: 32-bit EL1 Support [ 0.989719] CPU features: detected: ARMv8.4 Translation Table Level [ 1.049576] CPU features: detected: Data cache clean to the PoU not required for I/D coherence [ 1.108269] CPU features: detected: Common not Private translations [ 1.151863] CPU features: detected: CRC32 instructions [ 1.186088] CPU features: detected: RCpc load-acquire (LDAPR) [ 1.225008] CPU features: detected: LSE atomic instructions [ 1.262173] CPU features: detected: Privileged Access Never [ 1.299687] CPU features: detected: RAS Extension Support [ 1.337388] CPU features: detected: Random Number Generator [ 1.374957] CPU features: detected: Speculation barrier (SB) [ 1.412860] CPU features: detected: Stage-2 Force Write-Back [ 1.458967] CPU features: detected: TLB range maintenance instructions [ 1.503504] CPU features: detected: Speculative Store Bypassing Safe (SSBS) [ 6.740431] CPU: All CPU(s) started at EL1 [ 6.779526] alternatives: patching kernel code [ 1685.522538] devtmpfs: initialized [ 1685.591049] KASLR enabled [ 1685.614942] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns [ 1685.686698] futex hash table entries: 512 (order: 3, 32768 bytes, linear) [ 1685.748071] pinctrl core: initialized pinctrl subsystem [ 1685.809348] DMI not present or invalid. [ 1685.854306] NET: Registered PF_NETLINK/PF_ROUTE protocol family [ 1685.924942] DMA: preallocated 128 KiB GFP_KERNEL pool for atomic allocations [ 1685.981774] DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA pool for atomic allocations [ 1686.042387] DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA32 pool for atomic allocations [ 1686.099864] audit: initializing netlink subsys (disabled) [ 1686.152647] audit: type=2000 audit(5.488:1): state=initialized audit_enabled=0 res=1 [ 7.525335] thermal_sys: Registered thermal governor 'step_wise' [ 7.565113] thermal_sys: Registered thermal governor 'power_allocator' [ 7.609143] cpuidle: using governor menu [ 7.681526] NET: Registered PF_QIPCRTR protocol family [ 7.720877] hw-breakpoint: found 6 breakpoint and 4 watchpoint registers. [ 7.770844] ASID allocator initialised with 32768 entries [ 7.816662] Serial: AMBA PL011 UART driver [ 1686.613910] HugeTLB registered 1.00 GiB page size, pre-allocated 0 pages [ 1686.666303] HugeTLB registered 32.0 MiB page size, pre-allocated 0 pages [ 1686.725358] HugeTLB registered 2.00 MiB page size, pre-allocated 0 pages [ 1686.772979] HugeTLB registered 64.0 KiB page size, pre-allocated 0 pages [..] The flow of execution looks like this: KVM_CREATE_VCPU 0 (VMM) -> kvm_timer_vcpu_init -> update_vtimer_cntvoff (VCPU0.CNTVOFF_EL2=kvm_phys_timer_read) KVM_ARM_VCPU_INIT (VMM) -> kvm_arch_vcpu_ioctl_vcpu_init -> kvm_vcpu_set_target -> kvm_reset_vcpu -> kvm_reset_sys_regs (VCPU0.CNTVOFF_EL2=0) KVM_CREATE_VCPU 1 (VMM) -> kvm_timer_vcpu_init -> update_vtimer_cntvoff (VCPU0.CNTVOFF_EL2=VCPU1.CNTVOFF_EL2=kvm_phys_timer_read) KVM_ARM_VCPU_INIT (VMM) -> kvm_arch_vcpu_ioctl_vcpu_init -> kvm_vcpu_set_target -> kvm_reset_vcpu -> kvm_reset_sys_regs (VCPU1.CNTVOFF_EL2=0) At this point VCPU0 has CNTVOFF_EL2 == kvm_phys_timer_read, and VCPU1 has CNTVOFF_EL2 == 0. The VCPUs having different CNTVOFF_EL2 valuess is just a symptom of the fact that CNTVOFF_EL2 is now reset in kvm_reset_sys_regs. This is with linux kvmarm/next [1], kvmtool [2], qemu [3] (I also saw similar behaviour on FVP). qemu-system-aarch64 -M virt \ -machine virtualization=true \ -machine virt,gic-version=3 \ -machine mte=off \ -cpu max,pauth=off -smp 2 -m 12144 \ -drive if=none,format=qcow2,file=disk.img,id=blk0 \ -device virtio-blk-pci,drive=blk0 \ -nographic \ -device virtio-net-pci,netdev=net0 \ -netdev user,id=net0,hostfwd=tcp::8024-:22 \ -kernel ../linux/arch/arm64/boot/Image \ -append "root=/dev/vda2" And then running kvmtool: lkvm run -k Image -p earlycon Hopefully that is all that is needed to reproduce it. The following patch gets it booting again, but I'm sure it's not the right fix. diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 53749d3a0996..1a1abda6af74 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1587,6 +1587,14 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +static void nv_reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + BUG_ON(!r->reg); + BUG_ON(r->reg >= NR_SYS_REGS); + if (vcpu_has_nv(vcpu)) + __vcpu_sys_reg(vcpu, r->reg) = r->val; +} + static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { @@ -2190,7 +2198,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0), EL2_REG(TPIDR_EL2, access_rw, reset_val, 0), - EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0), + EL2_REG(CNTVOFF_EL2, access_rw, nv_reset_val, 0), EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), EL12_REG(SCTLR, access_vm_reg, reset_val, 0x00C50078), Thanks, Joey [1] https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git/commit/?h=next&id=96a4627dbbd48144a65af936b321701c70876026 [2] e17d182ad3f797f01947fc234d95c96c050c534b [3] 99d6b11b5b44d7dd64f4cb1973184e40a4a174f8
Hi Joey, On Fri, Feb 24, 2023 at 05:39:15PM +0000, Joey Gouly wrote: > I'm having an issue with this commit where a VCPU is getting a CNTVOFF_EL2 set > to 0, so it sees the same time as the host system, and the other VCPU has the > correct offset. Yikes! > The flow of execution looks like this: > KVM_CREATE_VCPU 0 (VMM) -> > kvm_timer_vcpu_init -> > update_vtimer_cntvoff (VCPU0.CNTVOFF_EL2=kvm_phys_timer_read) > KVM_ARM_VCPU_INIT (VMM) -> > kvm_arch_vcpu_ioctl_vcpu_init -> > kvm_vcpu_set_target -> > kvm_reset_vcpu -> > kvm_reset_sys_regs (VCPU0.CNTVOFF_EL2=0) > > KVM_CREATE_VCPU 1 (VMM) -> > kvm_timer_vcpu_init -> > update_vtimer_cntvoff (VCPU0.CNTVOFF_EL2=VCPU1.CNTVOFF_EL2=kvm_phys_timer_read) > KVM_ARM_VCPU_INIT (VMM) -> > kvm_arch_vcpu_ioctl_vcpu_init -> > kvm_vcpu_set_target -> > kvm_reset_vcpu -> > kvm_reset_sys_regs (VCPU1.CNTVOFF_EL2=0) > > At this point VCPU0 has CNTVOFF_EL2 == kvm_phys_timer_read, and VCPU1 > has CNTVOFF_EL2 == 0. > > The VCPUs having different CNTVOFF_EL2 valuess is just a symptom of the fact that > CNTVOFF_EL2 is now reset in kvm_reset_sys_regs. Right, and the fundamental problem at hand is that we used CNTVOFF_EL2 to stash the _host's_ offset even though we are trying to change the meaning of it to be part of the virtual EL2's context. > The following patch gets it booting again, but I'm sure it's not the right fix. I'd rather we just break the host away from using the shadow reg altogether and separately track the host offset. As it so happens Marc has a patch that does exactly that [*]. Marc, do you want to resend that patch in isolation addressing the feedback and link to this bug report? [*] https://lore.kernel.org/kvmarm/20230216142123.2638675-6-maz@kernel.org/
On Fri, 24 Feb 2023 18:36:23 +0000, Oliver Upton <oliver.upton@linux.dev> wrote: > > Hi Joey, > > On Fri, Feb 24, 2023 at 05:39:15PM +0000, Joey Gouly wrote: > > I'm having an issue with this commit where a VCPU is getting a CNTVOFF_EL2 set > > to 0, so it sees the same time as the host system, and the other VCPU has the > > correct offset. > > Yikes! > > > The flow of execution looks like this: > > KVM_CREATE_VCPU 0 (VMM) -> > > kvm_timer_vcpu_init -> > > update_vtimer_cntvoff (VCPU0.CNTVOFF_EL2=kvm_phys_timer_read) > > KVM_ARM_VCPU_INIT (VMM) -> > > kvm_arch_vcpu_ioctl_vcpu_init -> > > kvm_vcpu_set_target -> > > kvm_reset_vcpu -> > > kvm_reset_sys_regs (VCPU0.CNTVOFF_EL2=0) > > > > KVM_CREATE_VCPU 1 (VMM) -> > > kvm_timer_vcpu_init -> > > update_vtimer_cntvoff (VCPU0.CNTVOFF_EL2=VCPU1.CNTVOFF_EL2=kvm_phys_timer_read) > > KVM_ARM_VCPU_INIT (VMM) -> > > kvm_arch_vcpu_ioctl_vcpu_init -> > > kvm_vcpu_set_target -> > > kvm_reset_vcpu -> > > kvm_reset_sys_regs (VCPU1.CNTVOFF_EL2=0) > > > > At this point VCPU0 has CNTVOFF_EL2 == kvm_phys_timer_read, and VCPU1 > > has CNTVOFF_EL2 == 0. > > > > The VCPUs having different CNTVOFF_EL2 valuess is just a symptom of the fact that > > CNTVOFF_EL2 is now reset in kvm_reset_sys_regs. > > Right, and the fundamental problem at hand is that we used CNTVOFF_EL2 > to stash the _host's_ offset even though we are trying to change the > meaning of it to be part of the virtual EL2's context. What is driving me nuts is that I have never managed to reproduce the problem so far. I guess I have too much crap in my tree to spot it... > > > The following patch gets it booting again, but I'm sure it's not the right fix. > > I'd rather we just break the host away from using the shadow reg > altogether and separately track the host offset. As it so happens Marc > has a patch that does exactly that [*]. Yup, might as well fix that *and* kill the lock inversion issue in one go... > Marc, do you want to resend that patch in isolation addressing the > feedback and link to this bug report? > > [*] https://lore.kernel.org/kvmarm/20230216142123.2638675-6-maz@kernel.org/ Joey, could you please give that patch a go, just to be on the safe side? Thanks, M.
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1312fb48f18b..d13f168abe4b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -490,23 +490,51 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) +#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0) +#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5) + #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) +#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) +#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) +#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) +#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) +#define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3) #define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) #define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) +#define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7) + +#define SYS_TTBR0_EL2 sys_reg(3, 4, 2, 0, 0) +#define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1) +#define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2) +#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) +#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) + #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) +#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) +#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0) +#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1) #define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) #define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) #define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) #define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) -#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) +#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) +#define SYS_HPFAR_EL2 sys_reg(3, 4, 6, 0, 4) + +#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0) +#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0) + +#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0) +#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1) +#define SYS_RMR_EL2 sys_reg(3, 4, 12, 0, 2) +#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) #define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) #define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0) #define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1) @@ -548,6 +576,12 @@ #define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6) #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) +#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1) +#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2) + +#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3) +#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) + /* VHE encodings for architectural EL0/1 system registers */ #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) @@ -570,6 +604,8 @@ #define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1) #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) +#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0) + /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c6cbfe6b854b..1e6ae3b2e6dd 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -24,6 +24,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> +#include <asm/kvm_nested.h> #include <asm/perf_event.h> #include <asm/sysreg.h> @@ -102,6 +103,18 @@ static u32 get_ccsidr(u32 csselr) return ccsidr; } +static bool access_rw(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + vcpu_write_sys_reg(vcpu, p->regval, r->reg); + else + p->regval = vcpu_read_sys_reg(vcpu, r->reg); + + return true; +} + /* * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). */ @@ -260,6 +273,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, return read_zero(vcpu, p); } +static bool trap_undef(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + kvm_inject_undefined(vcpu); + return false; +} + /* * ARMv8.1 mandates at least a trivial LORegion implementation, where all the * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0 @@ -370,12 +391,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (p->is_write) { - vcpu_write_sys_reg(vcpu, p->regval, r->reg); + access_rw(vcpu, p, r); + if (p->is_write) vcpu_set_flag(vcpu, DEBUG_DIRTY); - } else { - p->regval = vcpu_read_sys_reg(vcpu, r->reg); - } trace_trap_reg(__func__, r->reg, p->is_write, p->regval); @@ -1446,6 +1464,24 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, .visibility = mte_visibility, \ } +static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (vcpu_has_nv(vcpu)) + return 0; + + return REG_HIDDEN; +} + +#define EL2_REG(name, acc, rst, v) { \ + SYS_DESC(SYS_##name), \ + .access = acc, \ + .reset = rst, \ + .reg = name, \ + .visibility = el2_visibility, \ + .val = v, \ +} + /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -1490,6 +1526,18 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, .visibility = raz_visibility, \ } +static bool access_sp_el1(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + __vcpu_sys_reg(vcpu, SP_EL1) = p->regval; + else + p->regval = __vcpu_sys_reg(vcpu, SP_EL1); + + return true; +} + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -1913,9 +1961,50 @@ static const struct sys_reg_desc sys_reg_descs[] = { { PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper, .reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 }, + EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0), + EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0), + EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), + EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), + EL2_REG(HCR_EL2, access_rw, reset_val, 0), + EL2_REG(MDCR_EL2, access_rw, reset_val, 0), + EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_EL2_DEFAULT ), + EL2_REG(HSTR_EL2, access_rw, reset_val, 0), + EL2_REG(HACR_EL2, access_rw, reset_val, 0), + + EL2_REG(TTBR0_EL2, access_rw, reset_val, 0), + EL2_REG(TTBR1_EL2, access_rw, reset_val, 0), + EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1), + EL2_REG(VTTBR_EL2, access_rw, reset_val, 0), + EL2_REG(VTCR_EL2, access_rw, reset_val, 0), + { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, + EL2_REG(SPSR_EL2, access_rw, reset_val, 0), + EL2_REG(ELR_EL2, access_rw, reset_val, 0), + { SYS_DESC(SYS_SP_EL1), access_sp_el1}, + { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, + EL2_REG(AFSR0_EL2, access_rw, reset_val, 0), + EL2_REG(AFSR1_EL2, access_rw, reset_val, 0), + EL2_REG(ESR_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 }, + + EL2_REG(FAR_EL2, access_rw, reset_val, 0), + EL2_REG(HPFAR_EL2, access_rw, reset_val, 0), + + EL2_REG(MAIR_EL2, access_rw, reset_val, 0), + EL2_REG(AMAIR_EL2, access_rw, reset_val, 0), + + EL2_REG(VBAR_EL2, access_rw, reset_val, 0), + EL2_REG(RVBAR_EL2, access_rw, reset_val, 0), + { SYS_DESC(SYS_RMR_EL2), trap_undef }, + + EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0), + EL2_REG(TPIDR_EL2, access_rw, reset_val, 0), + + EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0), + EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), + + EL2_REG(SP_EL2, NULL, reset_unknown, 0), }; static bool trap_dbgdidr(struct kvm_vcpu *vcpu,