@@ -1179,6 +1179,7 @@ struct kvm_x86_ops {
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
int (*check_nested_events)(struct kvm_vcpu *vcpu);
+ bool (*nested_hv_timer_pending)(struct kvm_vcpu *vcpu);
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
@@ -3687,6 +3687,12 @@ static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
vcpu->arch.exception.payload);
}
+static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
+{
+ return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
+ to_vmx(vcpu)->nested.preemption_timer_expired;
+}
+
static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3742,8 +3748,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
return 0;
}
- if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
- vmx->nested.preemption_timer_expired) {
+ if (nested_vmx_preemption_timer_pending(vcpu)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
@@ -6443,6 +6448,7 @@ __init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops,
exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
ops->check_nested_events = vmx_check_nested_events;
+ ops->nested_hv_timer_pending = nested_vmx_preemption_timer_pending;
ops->get_nested_state = vmx_get_nested_state;
ops->set_nested_state = vmx_set_nested_state;
ops->get_vmcs12_pages = nested_get_vmcs12_pages;
@@ -8324,6 +8324,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops.enable_nmi_window(vcpu);
if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
kvm_x86_ops.enable_irq_window(vcpu);
+ if (is_guest_mode(vcpu) &&
+ kvm_x86_ops.nested_hv_timer_pending &&
+ kvm_x86_ops.nested_hv_timer_pending(vcpu))
+ req_immediate_exit = true;
WARN_ON(vcpu->arch.exception.pending);
}
Add a kvm_x86_ops hook to detect a nested pending "hypervisor timer" and use it to effectively open a window for servicing the expired timer. Like pending SMIs on VMX, opening a window simply means requesting an immediate exit. This fixes a bug where an expired VMX preemption timer (for L2) will be delayed and/or lost if a pending exception is injected into L2. The pending exception is rightly prioritized by vmx_check_nested_events() and injected into L2, with the preemption timer left pending. Because no window opened, L2 is free to run uninterrupted. Fixes: f4124500c2c13 ("KVM: nVMX: Fully emulate preemption timer") Reported-by: Jim Mattson <jmattson@google.com> Cc: Oliver Upton <oupton@google.com> Cc: Peter Shier <pshier@google.com> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/nested.c | 10 ++++++++-- arch/x86/kvm/x86.c | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-)