@@ -9670,53 +9670,70 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
{
+ bool can_inject = !kvm_event_needs_reinjection(vcpu);
int r;
- bool can_inject = true;
- /* try to reinject previous events if any */
+ /*
+ * Process nested events first, as nested VM-Exit supercedes event
+ * re-injection. If there's an event queued for re-injection, it will
+ * be saved into the appropriate vmc{b,s}12 fields on nested VM-Exit.
+ */
+ if (is_guest_mode(vcpu))
+ r = kvm_check_nested_events(vcpu);
+ else
+ r = 0;
- if (vcpu->arch.exception.injected) {
+ /*
+ * Re-inject exceptions and events *especially* if immediate entry+exit
+ * to/from L2 is needed, as any event that has already been injected
+ * into L2 needs to complete its lifecycle before injecting a new event.
+ *
+ * Don't re-inject an NMI or interrupt if there is a pending exception.
+ * This collision arises if an exception occurred while vectoring the
+ * injected event, KVM intercepted said exception, and KVM ultimately
+ * determined the fault belongs to the guest and queues the exception
+ * for injection back into the guest.
+ *
+ * "Injected" interrupts can also collide with pending exceptions if
+ * userspace ignores the "ready for injection" flag and blindly queues
+ * an interrupt. In that case, prioritizing the exception is correct,
+ * as the exception "occurred" before the exit to userspace. Trap-like
+ * exceptions, e.g. most #DBs, have higher priority than interrupts.
+ * And while fault-like exceptions, e.g. #GP and #PF, are the lowest
+ * priority, they're only generated (pended) during instruction
+ * execution, and interrupts are recognized at instruction boundaries.
+ * Thus a pending fault-like exception means the fault occurred on the
+ * *previous* instruction and must be serviced prior to recognizing any
+ * new events in order to fully complete the previous instruction.
+ */
+ if (vcpu->arch.exception.injected)
kvm_inject_exception(vcpu);
- can_inject = false;
- }
+ else if (vcpu->arch.exception.pending)
+ ; /* see above */
+ else if (vcpu->arch.nmi_injected)
+ static_call(kvm_x86_inject_nmi)(vcpu);
+ else if (vcpu->arch.interrupt.injected)
+ static_call(kvm_x86_inject_irq)(vcpu, true);
+
/*
- * Do not inject an NMI or interrupt if there is a pending
- * exception. Exceptions and interrupts are recognized at
- * instruction boundaries, i.e. the start of an instruction.
- * Trap-like exceptions, e.g. #DB, have higher priority than
- * NMIs and interrupts, i.e. traps are recognized before an
- * NMI/interrupt that's pending on the same instruction.
- * Fault-like exceptions, e.g. #GP and #PF, are the lowest
- * priority, but are only generated (pended) during instruction
- * execution, i.e. a pending fault-like exception means the
- * fault occurred on the *previous* instruction and must be
- * serviced prior to recognizing any new events in order to
- * fully complete the previous instruction.
+ * Exceptions that morph to VM-Exits are handled above, and pending
+ * exceptions on top of injected exceptions that do not VM-Exit should
+ * either morph to #DF or, sadly, override the injected exception.
*/
- else if (!vcpu->arch.exception.pending) {
- if (vcpu->arch.nmi_injected) {
- static_call(kvm_x86_inject_nmi)(vcpu);
- can_inject = false;
- } else if (vcpu->arch.interrupt.injected) {
- static_call(kvm_x86_inject_irq)(vcpu, true);
- can_inject = false;
- }
- }
-
WARN_ON_ONCE(vcpu->arch.exception.injected &&
vcpu->arch.exception.pending);
/*
- * Call check_nested_events() even if we reinjected a previous event
- * in order for caller to determine if it should require immediate-exit
- * from L2 to L1 due to pending L1 events which require exit
- * from L2 to L1.
+ * Bail if immediate entry+exit to/from the guest is needed to complete
+ * nested VM-Enter or event re-injection so that a different pending
+ * event can be serviced (or if KVM needs to exit to userspace).
+ *
+ * Otherwise, continue processing events even if VM-Exit occurred. The
+ * VM-Exit will have cleared exceptions that were meant for L2, but
+ * there may now be events that can be injected into L1.
*/
- if (is_guest_mode(vcpu)) {
- r = kvm_check_nested_events(vcpu);
- if (r < 0)
- goto out;
- }
+ if (r < 0)
+ goto out;
/* try to inject new event if pending */
if (vcpu->arch.exception.pending) {