@@ -824,6 +824,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (ret)
return ret;
+ if (vcpu_has_nv(vcpu)) {
+ ret = kvm_vgic_vcpu_nv_init(vcpu);
+ if (ret)
+ return ret;
+ }
+
/*
* This needs to happen after any restriction has been applied
* to the feature set.
@@ -180,6 +180,20 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
return 0;
}
+int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guard(mutex)(&vcpu->kvm->arch.config_lock);
+
+ /* Cope with vintage userspace. Maybe we should fail instead */
+ if (vcpu->kvm->arch.vgic.maint_irq == 0)
+ vcpu->kvm->arch.vgic.maint_irq = kvm_vgic_global_state.maint_irq;
+ ret = kvm_vgic_set_owner(vcpu, vcpu->kvm->arch.vgic.maint_irq, vcpu);
+
+ return ret;
+}
+
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -588,12 +602,20 @@ void kvm_vgic_cpu_down(void)
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
{
+ struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data;
+
/*
* We cannot rely on the vgic maintenance interrupt to be
* delivered synchronously. This means we can only use it to
* exit the VM, and we perform the handling of EOIed
* interrupts on the exit path (see vgic_fold_lr_state).
+ *
+ * Of course, NV throws a wrench in this plan, and needs
+ * something special.
*/
+ if (vcpu && vgic_state_is_nested(vcpu))
+ vgic_v3_handle_nested_maint_irq(vcpu);
+
return IRQ_HANDLED;
}
@@ -73,6 +73,24 @@ static DEFINE_PER_CPU(struct shadow_if, shadow_if);
* interrupt. The L0 active state will be cleared by the HW if the L1
* interrupt was itself backed by a HW interrupt.
*
+ * Maintenance Interrupt (MI) management:
+ *
+ * Since the L2 guest runs the vgic in its full glory, MIs get delivered and
+ * used as a handover point between L2 and L1.
+ *
+ * - on delivery of a MI to L0 while L2 is running: make the L1 MI pending,
+ * and let it rip. This will initiate a vcpu_put() on L2, and allow L1 to
+ * run and process the MI.
+ *
+ * - L1 MI is a fully virtual interrupt, not linked to the host's MI. Its
+ * state must be computed at each entry/exit of the guest, much like we do
+ * it for the PMU interrupt.
+ *
+ * - because most of the ICH_*_EL2 registers live in the VNCR page, the
+ * quality of emulation is poor: L1 can setup the vgic so that an MI would
+ * immediately fire, and not observe anything until the next exit. Trying
+ * to read ICH_MISR_EL2 would do the trick, for example.
+ *
* System register emulation:
*
* We get two classes of registers:
@@ -341,3 +359,30 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
shadow_if->lr_map = 0;
}
+
+/*
+ * If we exit a L2 VM with a pending maintenance interrupt from the GIC,
+ * then we need to forward this to L1 so that it can re-sync the appropriate
+ * LRs and sample level triggered interrupts again.
+ */
+void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu)
+{
+ bool state = read_sysreg_s(SYS_ICH_MISR_EL2);
+
+ /* This will force a switch back to L1 if the level is high */
+ kvm_vgic_inject_irq(vcpu->kvm, vcpu,
+ vcpu->kvm->arch.vgic.maint_irq, state, vcpu);
+
+ sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0);
+}
+
+void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu)
+{
+ bool level;
+
+ level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En;
+ if (level)
+ level &= vgic_v3_get_misr(vcpu);
+ kvm_vgic_inject_irq(vcpu->kvm, vcpu,
+ vcpu->kvm->arch.vgic.maint_irq, level, vcpu);
+}
@@ -878,6 +878,9 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
return;
}
+ if (vcpu_has_nv(vcpu))
+ vgic_v3_nested_update_mi(vcpu);
+
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
@@ -921,6 +924,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
*
* - Otherwise, do exactly *NOTHING*. The guest state is
* already loaded, and we can carry on with running it.
+ *
+ * If we have NV, but are not in a nested state, compute the
+ * maintenance interrupt state, as it may fire.
*/
if (vgic_state_is_nested(vcpu)) {
if (kvm_vgic_vcpu_pending_irq(vcpu))
@@ -929,6 +935,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
return;
}
+ if (vcpu_has_nv(vcpu))
+ vgic_v3_nested_update_mi(vcpu);
+
/*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
@@ -356,5 +356,7 @@ static inline bool kvm_has_gicv3(struct kvm *kvm)
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu);
void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
+void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
+void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu);
#endif
@@ -249,6 +249,9 @@ struct vgic_dist {
int nr_spis;
+ /* The GIC maintenance IRQ for nested hypervisors. */
+ u32 maint_irq;
+
/* base addresses in guest physical address space: */
gpa_t vgic_dist_base; /* distributor */
union {
@@ -369,6 +372,7 @@ extern struct static_key_false vgic_v3_cpuif_trap;
int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr);
void kvm_vgic_early_init(struct kvm *kvm);
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
+int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu);
int kvm_vgic_create(struct kvm *kvm, u32 type);
void kvm_vgic_destroy(struct kvm *kvm);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
Emulating the vGIC means emulating the dreaded Maintenance Interrupt. This is a two-pronged problem: - while running L2, getting an MI translates into an MI injected in the L1 based on the state of the HW. - while running L1, we must accurately reflect the state of the MI line, based on the in-memory state. The MI INTID is added to the distributor, as expected on any virtualisation-capable implementation, and further patches will allow its configuration. Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/arm.c | 6 ++++ arch/arm64/kvm/vgic/vgic-init.c | 22 ++++++++++++++ arch/arm64/kvm/vgic/vgic-v3-nested.c | 45 ++++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic.c | 9 ++++++ arch/arm64/kvm/vgic/vgic.h | 2 ++ include/kvm/arm_vgic.h | 4 +++ 6 files changed, 88 insertions(+)