@@ -3175,6 +3175,15 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
offset_in_page(vmcs12->posted_intr_desc_addr));
vmcs_write64(POSTED_INTR_DESC_ADDR,
pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
+ } else {
+ /*
+ * Defer the KVM_INTERNAL_EXIT until KVM tries to
+ * access the contents of the VMCS12 posted interrupt
+ * descriptor. (Note that KVM may do this when it
+ * should not, per the architectural specification.)
+ */
+ vmx->nested.pi_desc = NULL;
+ pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR);
}
}
if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
@@ -3689,10 +3698,14 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
void *vapic_page;
u16 status;
- if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
+ if (!vmx->nested.pi_pending)
return 0;
+ if (!vmx->nested.pi_desc)
+ goto mmio_needed;
+
vmx->nested.pi_pending = false;
+
if (!pi_test_and_clear_on(vmx->nested.pi_desc))
return 0;
Don't allow posted interrupts to modify a stale posted interrupt descriptor (including the initial value of 0). Empirical tests on real hardware reveal that a posted interrupt descriptor referencing an unbacked address has PCI bus error semantics (reads as all 1's; writes are ignored). However, kvm can't distinguish unbacked addresses from device-backed (MMIO) addresses, so it should really ask userspace for an MMIO completion. That's overly complicated, so just punt with KVM_INTERNAL_ERROR. Don't return the error until the posted interrupt descriptor is actually accessed. We don't want to break the existing kvm-unit-tests that assume they can launch an L2 VM with a posted interrupt descriptor that references MMIO space in L1. Fixes: 6beb7bd52e48 ("kvm: nVMX: Refactor nested_get_vmcs12_pages()") Signed-off-by: Jim Mattson <jmattson@google.com> --- arch/x86/kvm/vmx/nested.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-)