Message ID | 20190621093843.220980-51-marc.zyngier@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: ARMv8.3 Nested Virtualization support | expand |
On 6/21/19 10:38 AM, Marc Zyngier wrote: > From: Jintack Lim <jintack@cs.columbia.edu> > > When entering a nested VM, we set up the hypervisor control interface > based on what the guest hypervisor has set. Especially, we investigate > each list register written by the guest hypervisor whether HW bit is > set. If so, we translate hw irq number from the guest's point of view > to the real hardware irq number if there is a mapping. > > Signed-off-by: Jintack Lim <jintack@cs.columbia.edu> > [Rewritten to support GICv3 instead of GICv2] > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > [Redesigned execution flow around vcpu load/put] > Signed-off-by: Christoffer Dall <christoffer.dall@arm.com> > --- > arch/arm/include/asm/kvm_emulate.h | 1 + > arch/arm/include/asm/kvm_host.h | 6 +- > arch/arm64/include/asm/kvm_host.h | 5 +- > arch/arm64/kvm/Makefile | 1 + > arch/arm64/kvm/nested.c | 10 ++ > arch/arm64/kvm/sys_regs.c | 178 ++++++++++++++++++++++++++++- > include/kvm/arm_vgic.h | 18 +++ > virt/kvm/arm/arm.c | 7 +- > virt/kvm/arm/vgic/vgic-v3-nested.c | 177 ++++++++++++++++++++++++++++ > virt/kvm/arm/vgic/vgic-v3.c | 28 +++++ > virt/kvm/arm/vgic/vgic.c | 32 ++++++ > 11 files changed, 456 insertions(+), 7 deletions(-) > create mode 100644 virt/kvm/arm/vgic/vgic-v3-nested.c > > diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h > index 865ce545b465..a53f19041e16 100644 > --- a/arch/arm/include/asm/kvm_emulate.h > +++ b/arch/arm/include/asm/kvm_emulate.h > @@ -334,5 +334,6 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, > static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {} > > static inline bool is_hyp_ctxt(struct kvm_vcpu *vcpu) { return false; } > +static inline int kvm_inject_nested_irq(struct kvm_vcpu *vcpu) { BUG(); } > > #endif /* __ARM_KVM_EMULATE_H__ */ > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index cc761610e41e..d6923ed55796 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -35,10 +35,12 @@ > #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS > #endif > > +/* KVM_REQ_GUEST_HYP_IRQ_PENDING is actually unused */ > #define KVM_REQ_SLEEP \ > KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) > -#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) > -#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) > +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) > +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) > +#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(3) > > DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index e0fe9acb46bf..e2e44cc650bf 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -53,8 +53,9 @@ > > #define KVM_REQ_SLEEP \ > KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) > -#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) > -#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) > +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) > +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) > +#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(3) > > DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); > > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile > index f11bd8b0d837..045a8f18f465 100644 > --- a/arch/arm64/kvm/Makefile > +++ b/arch/arm64/kvm/Makefile > @@ -38,3 +38,4 @@ kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o > > kvm-$(CONFIG_KVM_ARM_HOST) += nested.o > kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o > +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3-nested.o > diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c > index 214d59019935..df2db9ab7cfb 100644 > --- a/arch/arm64/kvm/nested.c > +++ b/arch/arm64/kvm/nested.c > @@ -539,3 +539,13 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) > kvm->arch.nested_mmus_size = 0; > kvm_free_stage2_pgd(&kvm->arch.mmu); > } > + > +bool vgic_state_is_nested(struct kvm_vcpu *vcpu) > +{ > + bool imo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_IMO; > + bool fmo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FMO; > + > + WARN(imo != fmo, "Separate virtual IRQ/FIQ settings not supported\n"); > + > + return nested_virt_in_use(vcpu) && imo && fmo && !is_hyp_ctxt(vcpu); > +} > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index 2031a59fcf49..ba3bcd29c02d 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -26,6 +26,8 @@ > #include <linux/printk.h> > #include <linux/uaccess.h> > > +#include <linux/irqchip/arm-gic-v3.h> > + > #include <asm/cacheflush.h> > #include <asm/cputype.h> > #include <asm/debug-monitors.h> > @@ -505,6 +507,18 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, > return true; > } > > +/* > + * The architecture says that non-secure write accesses to this register from > + * EL1 are trapped to EL2, if either: > + * - HCR_EL2.FMO==1, or > + * - HCR_EL2.IMO==1 > + */ > +static bool sgi_traps_to_vel2(struct kvm_vcpu *vcpu) > +{ > + return !vcpu_mode_el2(vcpu) && > + !!(__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO)); > +} > + > /* > * Trap handler for the GICv3 SGI generation system register. > * Forward the request to the VGIC emulation. > @@ -520,6 +534,11 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, > if (!p->is_write) > return read_from_write_only(vcpu, p, r); > > + if (sgi_traps_to_vel2(vcpu)) { > + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu)); > + return false; > + } > + > /* > * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates > * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group, > @@ -563,7 +582,13 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, > if (p->is_write) > return ignore_write(vcpu, p); > > - p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; > + if (p->Op1 == 4) { /* ICC_SRE_EL2 */ > + p->regval = (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE | > + ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB); > + } else { /* ICC_SRE_EL1 */ > + p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; > + } > + > return true; > } > > @@ -1793,6 +1818,122 @@ static bool access_id_aa64pfr0_el1(struct kvm_vcpu *v, > return true; > } > > +static bool access_gic_apr(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; > + u32 index, *base; > + > + index = r->Op2; > + if (r->CRm == 8) > + base = cpu_if->vgic_ap0r; > + else > + base = cpu_if->vgic_ap1r; > + > + if (p->is_write) > + base[index] = p->regval; > + else > + p->regval = base[index]; > + > + return true; > +} > + > +static bool access_gic_hcr(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; > + > + if (p->is_write) > + cpu_if->vgic_hcr = p->regval; Probably because there's only enough NV support to run an L1 KVM hypervisor + L2 guest, but the L1 guest ICH_HCR_EL2 value is written to the register unmodified in vgic_v3_load, and there's no support for forwarding traps that can be configured via ICH_HCR_EL2 (or even handling some traps - ICV_CTLR_EL1 can be trapped when ICH_HCR_EL2.TC = 1). > + else > + p->regval = cpu_if->vgic_hcr; > + > + return true; > +} > + > +static bool access_gic_vtr(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + if (p->is_write) > + return write_to_read_only(vcpu, p, r); > + > + p->regval = kvm_vgic_global_state.ich_vtr_el2; > + > + return true; > +} > + > +static bool access_gic_misr(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + if (p->is_write) > + return write_to_read_only(vcpu, p, r); > + > + p->regval = vgic_v3_get_misr(vcpu); > + > + return true; > +} > + > +static bool access_gic_eisr(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + if (p->is_write) > + return write_to_read_only(vcpu, p, r); > + > + p->regval = vgic_v3_get_eisr(vcpu); > + > + return true; > +} > + > +static bool access_gic_elrsr(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + if (p->is_write) > + return write_to_read_only(vcpu, p, r); > + > + p->regval = vgic_v3_get_elrsr(vcpu); > + > + return true; > +} > + > +static bool access_gic_vmcr(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; > + > + if (p->is_write) > + cpu_if->vgic_vmcr = p->regval; > + else > + p->regval = cpu_if->vgic_vmcr; > + > + return true; > +} > + > +static bool access_gic_lr(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; > + u32 index; > + > + index = p->Op2; > + if (p->CRm == 13) > + index += 8; > + > + if (p->is_write) > + cpu_if->vgic_lr[index] = p->regval; > + else > + p->regval = cpu_if->vgic_lr[index]; > + > + return true; > +} > + > /* > * Architected system registers. > * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 > @@ -2123,6 +2264,41 @@ static const struct sys_reg_desc sys_reg_descs[] = { > { SYS_DESC(SYS_RMR_EL2), access_rw, reset_val, RMR_EL2, 0 }, > { SYS_DESC(SYS_VDISR_EL2), trap_undef }, > > + { SYS_DESC(SYS_ICH_AP0R0_EL2), access_gic_apr }, > + { SYS_DESC(SYS_ICH_AP0R1_EL2), access_gic_apr }, > + { SYS_DESC(SYS_ICH_AP0R2_EL2), access_gic_apr }, > + { SYS_DESC(SYS_ICH_AP0R3_EL2), access_gic_apr }, > + { SYS_DESC(SYS_ICH_AP1R0_EL2), access_gic_apr }, > + { SYS_DESC(SYS_ICH_AP1R1_EL2), access_gic_apr }, > + { SYS_DESC(SYS_ICH_AP1R2_EL2), access_gic_apr }, > + { SYS_DESC(SYS_ICH_AP1R3_EL2), access_gic_apr }, > + > + { SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre }, > + > + { SYS_DESC(SYS_ICH_HCR_EL2), access_gic_hcr }, > + { SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr }, > + { SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr }, > + { SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr }, > + { SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr }, > + { SYS_DESC(SYS_ICH_VMCR_EL2), access_gic_vmcr }, > + > + { SYS_DESC(SYS_ICH_LR0_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR1_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR2_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR3_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR4_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR5_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR6_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR7_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR8_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR9_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR10_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR11_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR12_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR13_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR14_EL2), access_gic_lr }, > + { SYS_DESC(SYS_ICH_LR15_EL2), access_gic_lr }, > + > { SYS_DESC(SYS_CONTEXTIDR_EL2), access_rw, reset_val, CONTEXTIDR_EL2, 0 }, > { SYS_DESC(SYS_TPIDR_EL2), access_rw, reset_val, TPIDR_EL2, 0 }, > > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > index 163b132e100e..707fbe627155 100644 > --- a/include/kvm/arm_vgic.h > +++ b/include/kvm/arm_vgic.h > @@ -310,6 +310,15 @@ struct vgic_cpu { > > struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; > > + /* CPU vif control registers for the virtual GICH interface */ > + struct vgic_v3_cpu_if nested_vgic_v3; > + > + /* > + * The shadow vif control register loaded to the hardware when > + * running a nested L2 guest with the virtual IMO/FMO bit set. > + */ > + struct vgic_v3_cpu_if shadow_vgic_v3; > + > raw_spinlock_t ap_list_lock; /* Protects the ap_list */ > > /* > @@ -366,6 +375,13 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); > void kvm_vgic_load(struct kvm_vcpu *vcpu); > void kvm_vgic_put(struct kvm_vcpu *vcpu); > > +void vgic_v3_load_nested(struct kvm_vcpu *vcpu); > +void vgic_v3_put_nested(struct kvm_vcpu *vcpu); > +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu); > +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu); > +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu); > +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu); > + > #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) > #define vgic_initialized(k) ((k)->arch.vgic.initialized) > #define vgic_ready(k) ((k)->arch.vgic.ready) > @@ -411,4 +427,6 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, > void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu); > void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu); > > +bool vgic_state_is_nested(struct kvm_vcpu *vcpu); > + > #endif /* __KVM_ARM_VGIC_H */ > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c > index ca10a11e044e..ddcab58ae440 100644 > --- a/virt/kvm/arm/arm.c > +++ b/virt/kvm/arm/arm.c > @@ -634,6 +634,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) > * that a VCPU sees new virtual interrupts. > */ > kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); > + > + if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu)) > + kvm_inject_nested_irq(vcpu); > } > } > > @@ -680,10 +683,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > */ > cond_resched(); > > - update_vmid(&vcpu->arch.hw_mmu->vmid); > - > check_vcpu_requests(vcpu); > > + update_vmid(&vcpu->arch.hw_mmu->vmid); Was this change made to prevent having a mmu with a valid vmid_gen, but which was never actually run? Or something else entirely? > + > /* > * Preparing the interrupts to be injected also > * involves poking the GIC, which must be done in a > diff --git a/virt/kvm/arm/vgic/vgic-v3-nested.c b/virt/kvm/arm/vgic/vgic-v3-nested.c > new file mode 100644 > index 000000000000..6fb81dfbb679 > --- /dev/null > +++ b/virt/kvm/arm/vgic/vgic-v3-nested.c > @@ -0,0 +1,177 @@ > +#include <linux/cpu.h> > +#include <linux/kvm.h> > +#include <linux/kvm_host.h> > +#include <linux/interrupt.h> > +#include <linux/io.h> > +#include <linux/uaccess.h> > + > +#include <linux/irqchip/arm-gic-v3.h> > + > +#include <asm/kvm_emulate.h> > +#include <asm/kvm_arm.h> > +#include <kvm/arm_vgic.h> > + > +#include "vgic.h" > + > +static inline struct vgic_v3_cpu_if *vcpu_nested_if(struct kvm_vcpu *vcpu) > +{ > + return &vcpu->arch.vgic_cpu.nested_vgic_v3; > +} Not especially relevant at this stage, but the nested_vgic_v3 member is accesses in several other places in sys_regs.c and vgic-v3.c. Perhaps this function could be moved to include/kvm/arm_vgic.h in a future revision. > + > +static inline struct vgic_v3_cpu_if *vcpu_shadow_if(struct kvm_vcpu *vcpu) > +{ > + return &vcpu->arch.vgic_cpu.shadow_vgic_v3; > +} > + > +static inline bool lr_triggers_eoi(u64 lr) > +{ > + return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI); > +} > + > +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu) > +{ > + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); > + u16 reg = 0; > + int i; > + > + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) { > + if (lr_triggers_eoi(cpu_if->vgic_lr[i])) > + reg |= BIT(i); > + } > + > + return reg; > +} > + > +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu) > +{ > + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); > + u16 reg = 0; > + int i; > + > + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) { > + if (!(cpu_if->vgic_lr[i] & ICH_LR_STATE)) > + reg |= BIT(i); > + } > + > + return reg; > +} > + > +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) > +{ > + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); > + int nr_lr = kvm_vgic_global_state.nr_lr; > + u64 reg = 0; > + > + if (vgic_v3_get_eisr(vcpu)) > + reg |= ICH_MISR_EOI; > + > + if (cpu_if->vgic_hcr & ICH_HCR_UIE) { > + int used_lrs; > + > + used_lrs = nr_lr - hweight16(vgic_v3_get_elrsr(vcpu)); > + if (used_lrs <= 1) > + reg |= ICH_MISR_U; > + } > + > + /* TODO: Support remaining bits in this register */ > + return reg; > +} > + > +/* > + * For LRs which have HW bit set such as timer interrupts, we modify them to > + * have the host hardware interrupt number instead of the virtual one programmed > + * by the guest hypervisor. > + */ > +static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu) > +{ > + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); > + struct vgic_v3_cpu_if *s_cpu_if = vcpu_shadow_if(vcpu); > + struct vgic_irq *irq; > + int i; > + > + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) { > + u64 lr = cpu_if->vgic_lr[i]; > + int l1_irq; > + > + if (!(lr & ICH_LR_HW)) > + goto next; > + > + /* We have the HW bit set */ > + l1_irq = (lr & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT; > + irq = vgic_get_irq(vcpu->kvm, vcpu, l1_irq); > + > + if (!irq || !irq->hw) { > + /* There was no real mapping, so nuke the HW bit */ > + lr &= ~ICH_LR_HW; > + if (irq) > + vgic_put_irq(vcpu->kvm, irq); > + goto next; > + } > + > + /* Translate the virtual mapping to the real one */ > + lr &= ~ICH_LR_EOI; /* Why? */ > + lr &= ~ICH_LR_PHYS_ID_MASK; > + lr |= (u64)irq->hwintid << ICH_LR_PHYS_ID_SHIFT; > + vgic_put_irq(vcpu->kvm, irq); > + > +next: > + s_cpu_if->vgic_lr[i] = lr; > + } > + > + s_cpu_if->used_lrs = kvm_vgic_global_state.nr_lr; > +} > + > +/* > + * Change the shadow HWIRQ field back to the virtual value before copying over > + * the entire shadow struct to the nested state. > + */ > +static void vgic_v3_fixup_shadow_lr_state(struct kvm_vcpu *vcpu) > +{ > + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); > + struct vgic_v3_cpu_if *s_cpu_if = vcpu_shadow_if(vcpu); > + int lr; > + > + for (lr = 0; lr < kvm_vgic_global_state.nr_lr; lr++) { > + s_cpu_if->vgic_lr[lr] &= ~ICH_LR_PHYS_ID_MASK; > + s_cpu_if->vgic_lr[lr] |= cpu_if->vgic_lr[lr] & ICH_LR_PHYS_ID_MASK; > + } > +} > + > +void vgic_v3_load_nested(struct kvm_vcpu *vcpu) > +{ > + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; > + > + vgic_cpu->shadow_vgic_v3 = vgic_cpu->nested_vgic_v3; > + vgic_v3_create_shadow_lr(vcpu); > + __vgic_v3_restore_state(vcpu_shadow_if(vcpu)); > +} > + > +void vgic_v3_put_nested(struct kvm_vcpu *vcpu) > +{ > + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; > + > + __vgic_v3_save_state(vcpu_shadow_if(vcpu)); > + > + /* > + * Translate the shadow state HW fields back to the virtual ones > + * before copying the shadow struct back to the nested one. > + */ > + vgic_v3_fixup_shadow_lr_state(vcpu); > + vgic_cpu->nested_vgic_v3 = vgic_cpu->shadow_vgic_v3; > +} > + > +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu) > +{ > + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); > + > + /* > + * If we exit a nested VM with a pending maintenance interrupt from the > + * GIC, then we need to forward this to the guest hypervisor so that it > + * can re-sync the appropriate LRs and sample level triggered interrupts > + * again. > + */ > + if (vgic_state_is_nested(vcpu) && > + (cpu_if->vgic_hcr & ICH_HCR_EN) && > + vgic_v3_get_misr(vcpu)) > + kvm_inject_nested_irq(vcpu); > +} I don't see this function used anywhere, shouldn't it be part of #53 "KVM: arm64: nv: Implement maintenance interrupt forwarding"? > diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c > index 77d23e817756..25edf32c28fb 100644 > --- a/virt/kvm/arm/vgic/vgic-v3.c > +++ b/virt/kvm/arm/vgic/vgic-v3.c > @@ -18,6 +18,7 @@ > #include <kvm/arm_vgic.h> > #include <asm/kvm_hyp.h> > #include <asm/kvm_mmu.h> > +#include <asm/kvm_nested.h> > #include <asm/kvm_asm.h> > > #include "vgic.h" > @@ -298,6 +299,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) > vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | > ICC_SRE_EL1_DFB | > ICC_SRE_EL1_SRE); > + /* > + * If nesting is allowed, force GICv3 onto the nested > + * guests as well. > + */ > + if (nested_virt_in_use(vcpu)) > + vcpu->arch.vgic_cpu.nested_vgic_v3.vgic_sre = vgic_v3->vgic_sre; > vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; > } else { > vgic_v3->vgic_sre = 0; > @@ -660,6 +667,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) > { > struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; > > + /* > + * vgic_v3_load_nested only affects the LRs in the shadow > + * state, so it is fine to pass the nested state around. > + */ > + if (vgic_state_is_nested(vcpu)) > + cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; > + > /* > * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen > * is dependent on ICC_SRE_EL1.SRE, and we have to perform the > @@ -672,12 +686,18 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) > > if (has_vhe()) > __vgic_v3_activate_traps(cpu_if); > + > + if (vgic_state_is_nested(vcpu)) > + vgic_v3_load_nested(vcpu); > } > > void vgic_v3_put(struct kvm_vcpu *vcpu) > { > struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; > > + if (vgic_state_is_nested(vcpu)) > + cpu_if = &vcpu->arch.vgic_cpu.shadow_vgic_v3; > + > if (likely(cpu_if->vgic_sre)) > cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); > > @@ -685,4 +705,12 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) > > if (has_vhe()) > __vgic_v3_deactivate_traps(cpu_if); > + > + if (vgic_state_is_nested(vcpu)) > + vgic_v3_put_nested(vcpu); > } > + > +__weak void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) {} > +__weak void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu) {} > +__weak void vgic_v3_load_nested(struct kvm_vcpu *vcpu) {} > +__weak void vgic_v3_put_nested(struct kvm_vcpu *vcpu) {} > diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c > index 6953aefecbb6..f32f49b0c803 100644 > --- a/virt/kvm/arm/vgic/vgic.c > +++ b/virt/kvm/arm/vgic/vgic.c > @@ -872,6 +872,10 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > { > int used_lrs; > > + /* If nesting, this is a load/put affair, not flush/sync. */ > + if (vgic_state_is_nested(vcpu)) > + return; > + > WARN_ON(vgic_v4_sync_hwstate(vcpu)); > > /* An empty ap_list_head implies used_lrs == 0 */ > @@ -920,6 +924,29 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) > !vgic_supports_direct_msis(vcpu->kvm)) > return; > > + /* > + * If in a nested state, we must return early. Two possibilities: > + * > + * - If we have any pending IRQ for the guest and the guest > + * expects IRQs to be handled in its virtual EL2 mode (the > + * virtual IMO bit is set) and it is not already running in > + * virtual EL2 mode, then we have to emulate an IRQ > + * exception to virtual EL2. > + * > + * We do that by placing a request to ourselves which will > + * abort the entry procedure and inject the exception at the > + * beginning of the run loop. > + * > + * - Otherwise, do exactly *NOTHING*. The guest state is > + * already loaded, and we can carry on with running it. > + */ > + if (vgic_state_is_nested(vcpu)) { > + if (kvm_vgic_vcpu_pending_irq(vcpu)) > + kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu); > + > + return; > + } > + > DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); > > if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { > @@ -1022,3 +1049,8 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) > > return map_is_active; > } > + > +__weak bool vgic_state_is_nested(struct kvm_vcpu *vcpu) > +{ > + return false; > +}
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 865ce545b465..a53f19041e16 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -334,5 +334,6 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {} static inline bool is_hyp_ctxt(struct kvm_vcpu *vcpu) { return false; } +static inline int kvm_inject_nested_irq(struct kvm_vcpu *vcpu) { BUG(); } #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index cc761610e41e..d6923ed55796 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -35,10 +35,12 @@ #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS #endif +/* KVM_REQ_GUEST_HYP_IRQ_PENDING is actually unused */ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) -#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(3) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e0fe9acb46bf..e2e44cc650bf 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -53,8 +53,9 @@ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) -#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(3) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index f11bd8b0d837..045a8f18f465 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -38,3 +38,4 @@ kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o kvm-$(CONFIG_KVM_ARM_HOST) += nested.o kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3-nested.o diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 214d59019935..df2db9ab7cfb 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -539,3 +539,13 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) kvm->arch.nested_mmus_size = 0; kvm_free_stage2_pgd(&kvm->arch.mmu); } + +bool vgic_state_is_nested(struct kvm_vcpu *vcpu) +{ + bool imo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_IMO; + bool fmo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FMO; + + WARN(imo != fmo, "Separate virtual IRQ/FIQ settings not supported\n"); + + return nested_virt_in_use(vcpu) && imo && fmo && !is_hyp_ctxt(vcpu); +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2031a59fcf49..ba3bcd29c02d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -26,6 +26,8 @@ #include <linux/printk.h> #include <linux/uaccess.h> +#include <linux/irqchip/arm-gic-v3.h> + #include <asm/cacheflush.h> #include <asm/cputype.h> #include <asm/debug-monitors.h> @@ -505,6 +507,18 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, return true; } +/* + * The architecture says that non-secure write accesses to this register from + * EL1 are trapped to EL2, if either: + * - HCR_EL2.FMO==1, or + * - HCR_EL2.IMO==1 + */ +static bool sgi_traps_to_vel2(struct kvm_vcpu *vcpu) +{ + return !vcpu_mode_el2(vcpu) && + !!(__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO)); +} + /* * Trap handler for the GICv3 SGI generation system register. * Forward the request to the VGIC emulation. @@ -520,6 +534,11 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, if (!p->is_write) return read_from_write_only(vcpu, p, r); + if (sgi_traps_to_vel2(vcpu)) { + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu)); + return false; + } + /* * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group, @@ -563,7 +582,13 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, if (p->is_write) return ignore_write(vcpu, p); - p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; + if (p->Op1 == 4) { /* ICC_SRE_EL2 */ + p->regval = (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE | + ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB); + } else { /* ICC_SRE_EL1 */ + p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; + } + return true; } @@ -1793,6 +1818,122 @@ static bool access_id_aa64pfr0_el1(struct kvm_vcpu *v, return true; } +static bool access_gic_apr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; + u32 index, *base; + + index = r->Op2; + if (r->CRm == 8) + base = cpu_if->vgic_ap0r; + else + base = cpu_if->vgic_ap1r; + + if (p->is_write) + base[index] = p->regval; + else + p->regval = base[index]; + + return true; +} + +static bool access_gic_hcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; + + if (p->is_write) + cpu_if->vgic_hcr = p->regval; + else + p->regval = cpu_if->vgic_hcr; + + return true; +} + +static bool access_gic_vtr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = kvm_vgic_global_state.ich_vtr_el2; + + return true; +} + +static bool access_gic_misr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = vgic_v3_get_misr(vcpu); + + return true; +} + +static bool access_gic_eisr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = vgic_v3_get_eisr(vcpu); + + return true; +} + +static bool access_gic_elrsr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = vgic_v3_get_elrsr(vcpu); + + return true; +} + +static bool access_gic_vmcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; + + if (p->is_write) + cpu_if->vgic_vmcr = p->regval; + else + p->regval = cpu_if->vgic_vmcr; + + return true; +} + +static bool access_gic_lr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; + u32 index; + + index = p->Op2; + if (p->CRm == 13) + index += 8; + + if (p->is_write) + cpu_if->vgic_lr[index] = p->regval; + else + p->regval = cpu_if->vgic_lr[index]; + + return true; +} + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -2123,6 +2264,41 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_RMR_EL2), access_rw, reset_val, RMR_EL2, 0 }, { SYS_DESC(SYS_VDISR_EL2), trap_undef }, + { SYS_DESC(SYS_ICH_AP0R0_EL2), access_gic_apr }, + { SYS_DESC(SYS_ICH_AP0R1_EL2), access_gic_apr }, + { SYS_DESC(SYS_ICH_AP0R2_EL2), access_gic_apr }, + { SYS_DESC(SYS_ICH_AP0R3_EL2), access_gic_apr }, + { SYS_DESC(SYS_ICH_AP1R0_EL2), access_gic_apr }, + { SYS_DESC(SYS_ICH_AP1R1_EL2), access_gic_apr }, + { SYS_DESC(SYS_ICH_AP1R2_EL2), access_gic_apr }, + { SYS_DESC(SYS_ICH_AP1R3_EL2), access_gic_apr }, + + { SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre }, + + { SYS_DESC(SYS_ICH_HCR_EL2), access_gic_hcr }, + { SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr }, + { SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr }, + { SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr }, + { SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr }, + { SYS_DESC(SYS_ICH_VMCR_EL2), access_gic_vmcr }, + + { SYS_DESC(SYS_ICH_LR0_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR1_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR2_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR3_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR4_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR5_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR6_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR7_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR8_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR9_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR10_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR11_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR12_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR13_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR14_EL2), access_gic_lr }, + { SYS_DESC(SYS_ICH_LR15_EL2), access_gic_lr }, + { SYS_DESC(SYS_CONTEXTIDR_EL2), access_rw, reset_val, CONTEXTIDR_EL2, 0 }, { SYS_DESC(SYS_TPIDR_EL2), access_rw, reset_val, TPIDR_EL2, 0 }, diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 163b132e100e..707fbe627155 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -310,6 +310,15 @@ struct vgic_cpu { struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; + /* CPU vif control registers for the virtual GICH interface */ + struct vgic_v3_cpu_if nested_vgic_v3; + + /* + * The shadow vif control register loaded to the hardware when + * running a nested L2 guest with the virtual IMO/FMO bit set. + */ + struct vgic_v3_cpu_if shadow_vgic_v3; + raw_spinlock_t ap_list_lock; /* Protects the ap_list */ /* @@ -366,6 +375,13 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); void kvm_vgic_load(struct kvm_vcpu *vcpu); void kvm_vgic_put(struct kvm_vcpu *vcpu); +void vgic_v3_load_nested(struct kvm_vcpu *vcpu); +void vgic_v3_put_nested(struct kvm_vcpu *vcpu); +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu); +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu); +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu); +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu); + #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) #define vgic_ready(k) ((k)->arch.vgic.ready) @@ -411,4 +427,6 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu); void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu); +bool vgic_state_is_nested(struct kvm_vcpu *vcpu); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index ca10a11e044e..ddcab58ae440 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -634,6 +634,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) * that a VCPU sees new virtual interrupts. */ kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); + + if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu)) + kvm_inject_nested_irq(vcpu); } } @@ -680,10 +683,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ cond_resched(); - update_vmid(&vcpu->arch.hw_mmu->vmid); - check_vcpu_requests(vcpu); + update_vmid(&vcpu->arch.hw_mmu->vmid); + /* * Preparing the interrupts to be injected also * involves poking the GIC, which must be done in a diff --git a/virt/kvm/arm/vgic/vgic-v3-nested.c b/virt/kvm/arm/vgic/vgic-v3-nested.c new file mode 100644 index 000000000000..6fb81dfbb679 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-v3-nested.c @@ -0,0 +1,177 @@ +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/uaccess.h> + +#include <linux/irqchip/arm-gic-v3.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <kvm/arm_vgic.h> + +#include "vgic.h" + +static inline struct vgic_v3_cpu_if *vcpu_nested_if(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.vgic_cpu.nested_vgic_v3; +} + +static inline struct vgic_v3_cpu_if *vcpu_shadow_if(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.vgic_cpu.shadow_vgic_v3; +} + +static inline bool lr_triggers_eoi(u64 lr) +{ + return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI); +} + +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); + u16 reg = 0; + int i; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) { + if (lr_triggers_eoi(cpu_if->vgic_lr[i])) + reg |= BIT(i); + } + + return reg; +} + +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); + u16 reg = 0; + int i; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) { + if (!(cpu_if->vgic_lr[i] & ICH_LR_STATE)) + reg |= BIT(i); + } + + return reg; +} + +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); + int nr_lr = kvm_vgic_global_state.nr_lr; + u64 reg = 0; + + if (vgic_v3_get_eisr(vcpu)) + reg |= ICH_MISR_EOI; + + if (cpu_if->vgic_hcr & ICH_HCR_UIE) { + int used_lrs; + + used_lrs = nr_lr - hweight16(vgic_v3_get_elrsr(vcpu)); + if (used_lrs <= 1) + reg |= ICH_MISR_U; + } + + /* TODO: Support remaining bits in this register */ + return reg; +} + +/* + * For LRs which have HW bit set such as timer interrupts, we modify them to + * have the host hardware interrupt number instead of the virtual one programmed + * by the guest hypervisor. + */ +static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); + struct vgic_v3_cpu_if *s_cpu_if = vcpu_shadow_if(vcpu); + struct vgic_irq *irq; + int i; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) { + u64 lr = cpu_if->vgic_lr[i]; + int l1_irq; + + if (!(lr & ICH_LR_HW)) + goto next; + + /* We have the HW bit set */ + l1_irq = (lr & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT; + irq = vgic_get_irq(vcpu->kvm, vcpu, l1_irq); + + if (!irq || !irq->hw) { + /* There was no real mapping, so nuke the HW bit */ + lr &= ~ICH_LR_HW; + if (irq) + vgic_put_irq(vcpu->kvm, irq); + goto next; + } + + /* Translate the virtual mapping to the real one */ + lr &= ~ICH_LR_EOI; /* Why? */ + lr &= ~ICH_LR_PHYS_ID_MASK; + lr |= (u64)irq->hwintid << ICH_LR_PHYS_ID_SHIFT; + vgic_put_irq(vcpu->kvm, irq); + +next: + s_cpu_if->vgic_lr[i] = lr; + } + + s_cpu_if->used_lrs = kvm_vgic_global_state.nr_lr; +} + +/* + * Change the shadow HWIRQ field back to the virtual value before copying over + * the entire shadow struct to the nested state. + */ +static void vgic_v3_fixup_shadow_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); + struct vgic_v3_cpu_if *s_cpu_if = vcpu_shadow_if(vcpu); + int lr; + + for (lr = 0; lr < kvm_vgic_global_state.nr_lr; lr++) { + s_cpu_if->vgic_lr[lr] &= ~ICH_LR_PHYS_ID_MASK; + s_cpu_if->vgic_lr[lr] |= cpu_if->vgic_lr[lr] & ICH_LR_PHYS_ID_MASK; + } +} + +void vgic_v3_load_nested(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + vgic_cpu->shadow_vgic_v3 = vgic_cpu->nested_vgic_v3; + vgic_v3_create_shadow_lr(vcpu); + __vgic_v3_restore_state(vcpu_shadow_if(vcpu)); +} + +void vgic_v3_put_nested(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + __vgic_v3_save_state(vcpu_shadow_if(vcpu)); + + /* + * Translate the shadow state HW fields back to the virtual ones + * before copying the shadow struct back to the nested one. + */ + vgic_v3_fixup_shadow_lr_state(vcpu); + vgic_cpu->nested_vgic_v3 = vgic_cpu->shadow_vgic_v3; +} + +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu); + + /* + * If we exit a nested VM with a pending maintenance interrupt from the + * GIC, then we need to forward this to the guest hypervisor so that it + * can re-sync the appropriate LRs and sample level triggered interrupts + * again. + */ + if (vgic_state_is_nested(vcpu) && + (cpu_if->vgic_hcr & ICH_HCR_EN) && + vgic_v3_get_misr(vcpu)) + kvm_inject_nested_irq(vcpu); +} diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 77d23e817756..25edf32c28fb 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -18,6 +18,7 @@ #include <kvm/arm_vgic.h> #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> +#include <asm/kvm_nested.h> #include <asm/kvm_asm.h> #include "vgic.h" @@ -298,6 +299,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB | ICC_SRE_EL1_SRE); + /* + * If nesting is allowed, force GICv3 onto the nested + * guests as well. + */ + if (nested_virt_in_use(vcpu)) + vcpu->arch.vgic_cpu.nested_vgic_v3.vgic_sre = vgic_v3->vgic_sre; vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; } else { vgic_v3->vgic_sre = 0; @@ -660,6 +667,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + /* + * vgic_v3_load_nested only affects the LRs in the shadow + * state, so it is fine to pass the nested state around. + */ + if (vgic_state_is_nested(vcpu)) + cpu_if = &vcpu->arch.vgic_cpu.nested_vgic_v3; + /* * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen * is dependent on ICC_SRE_EL1.SRE, and we have to perform the @@ -672,12 +686,18 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) if (has_vhe()) __vgic_v3_activate_traps(cpu_if); + + if (vgic_state_is_nested(vcpu)) + vgic_v3_load_nested(vcpu); } void vgic_v3_put(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + if (vgic_state_is_nested(vcpu)) + cpu_if = &vcpu->arch.vgic_cpu.shadow_vgic_v3; + if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); @@ -685,4 +705,12 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) if (has_vhe()) __vgic_v3_deactivate_traps(cpu_if); + + if (vgic_state_is_nested(vcpu)) + vgic_v3_put_nested(vcpu); } + +__weak void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) {} +__weak void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu) {} +__weak void vgic_v3_load_nested(struct kvm_vcpu *vcpu) {} +__weak void vgic_v3_put_nested(struct kvm_vcpu *vcpu) {} diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 6953aefecbb6..f32f49b0c803 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -872,6 +872,10 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { int used_lrs; + /* If nesting, this is a load/put affair, not flush/sync. */ + if (vgic_state_is_nested(vcpu)) + return; + WARN_ON(vgic_v4_sync_hwstate(vcpu)); /* An empty ap_list_head implies used_lrs == 0 */ @@ -920,6 +924,29 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) !vgic_supports_direct_msis(vcpu->kvm)) return; + /* + * If in a nested state, we must return early. Two possibilities: + * + * - If we have any pending IRQ for the guest and the guest + * expects IRQs to be handled in its virtual EL2 mode (the + * virtual IMO bit is set) and it is not already running in + * virtual EL2 mode, then we have to emulate an IRQ + * exception to virtual EL2. + * + * We do that by placing a request to ourselves which will + * abort the entry procedure and inject the exception at the + * beginning of the run loop. + * + * - Otherwise, do exactly *NOTHING*. The guest state is + * already loaded, and we can carry on with running it. + */ + if (vgic_state_is_nested(vcpu)) { + if (kvm_vgic_vcpu_pending_irq(vcpu)) + kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu); + + return; + } + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { @@ -1022,3 +1049,8 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) return map_is_active; } + +__weak bool vgic_state_is_nested(struct kvm_vcpu *vcpu) +{ + return false; +}