Message ID | 20161103150356.GE7771@potion (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 03/11/2016 16:03, Radim Krčmář wrote: > 2016-11-03 14:30+0100, Paolo Bonzini: >> On 26/10/2016 21:59, Radim Krčmář wrote: >>> 2016-10-14 20:21+0200, Paolo Bonzini: >>>> Calling apic_find_highest_irr results in IRR being scanned twice, >>>> once in vmx_sync_pir_from_irr and once in apic_search_irr. Change >>>> sync_pir_from_irr to do the RVI write and kvm_apic_update_irr to >>>> compute the new RVI on the fly. >>>> >>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >>>> --- >>> >>> Reviewed-by: Radim Krčmář <rkrcmar@redhat.com> >> >> Nope, this breaks nested VMX exit on external interrupt. For now I'm >> testing only patch 1 and will push that one only to kvm/next. > > Hm, does it also happen with this change? Probably not but I wanted to understand why. :) Paolo > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 4b20f7304b9c..6be110e53e9e 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -941,7 +941,7 @@ struct kvm_x86_ops { > void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); > void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); > void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); > - void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); > + int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); > int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); > int (*get_tdp_level)(void); > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index f8157a36ab09..ffa541d38343 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -4383,9 +4383,9 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) > return; > } > > -static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) > +static int svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) > { > - return; > + return -1; // XXX > } > > static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index f272ccfddc48..2211e1774733 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -8493,17 +8493,15 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) > } > } > > -static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) > +static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > - int max_irr; > > if (!pi_test_on(&vmx->pi_desc)) > return; > > pi_clear_on(&vmx->pi_desc); > - max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); > - vmx_hwapic_irr_update(vcpu, max_irr); > + return kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); > } > > static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 20d572c18293..e8789f3a9bfd 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -6675,7 +6675,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > * notified with kvm_vcpu_kick. > */ > if (vcpu->arch.apicv_active) > - kvm_x86_ops->sync_pir_to_irr(vcpu); > + kvm_x86_ops->hwapic_irr_update(vcpu, kvm_x86_ops->sync_pir_to_irr(vcpu)); > } > > if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2016-11-03 17:00+0100, Paolo Bonzini: > On 03/11/2016 16:03, Radim Krčmář wrote: >> 2016-11-03 14:30+0100, Paolo Bonzini: >>> On 26/10/2016 21:59, Radim Krčmář wrote: >>>> 2016-10-14 20:21+0200, Paolo Bonzini: >>>>> Calling apic_find_highest_irr results in IRR being scanned twice, >>>>> once in vmx_sync_pir_from_irr and once in apic_search_irr. Change >>>>> sync_pir_from_irr to do the RVI write and kvm_apic_update_irr to >>>>> compute the new RVI on the fly. >>>>> >>>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >>>>> --- >>>> >>>> Reviewed-by: Radim Krčmář <rkrcmar@redhat.com> >>> >>> Nope, this breaks nested VMX exit on external interrupt. For now I'm >>> testing only patch 1 and will push that one only to kvm/next. Which hypervisor is being nested? >> Hm, does it also happen with this change? > > Probably not but I wanted to understand why. :) Yeah, I'm also looking forward to knowing the funny coincidence. I think a bug is likely for hypervisors that don't enable PIN_BASED_EXT_INTR_MASK. The bug would trigger when kvm_cpu_has_interrupt() in vmx_check_nested_events() in kvm_arch_vcpu_runnable() queues the interrupt ... but I didn't see how this would have caused a problem. :) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/11/2016 19:07, Radim Krčmář wrote: > 2016-11-03 17:00+0100, Paolo Bonzini: >> On 03/11/2016 16:03, Radim Krčmář wrote: >>> 2016-11-03 14:30+0100, Paolo Bonzini: >>>> On 26/10/2016 21:59, Radim Krčmář wrote: >>>>> 2016-10-14 20:21+0200, Paolo Bonzini: >>>>>> Calling apic_find_highest_irr results in IRR being scanned twice, >>>>>> once in vmx_sync_pir_from_irr and once in apic_search_irr. Change >>>>>> sync_pir_from_irr to do the RVI write and kvm_apic_update_irr to >>>>>> compute the new RVI on the fly. >>>>>> >>>>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >>>>>> --- >>>>> >>>>> Reviewed-by: Radim Krčmář <rkrcmar@redhat.com> >>>> >>>> Nope, this breaks nested VMX exit on external interrupt. For now I'm >>>> testing only patch 1 and will push that one only to kvm/next. > > Which hypervisor is being nested? vmx.flat. :) > I think a bug is likely for hypervisors that don't enable > PIN_BASED_EXT_INTR_MASK. The bug would trigger when > kvm_cpu_has_interrupt() in vmx_check_nested_events() in > kvm_arch_vcpu_runnable() queues the interrupt ... > but I didn't see how this would have caused a problem. :) Ironically, _not_ enabling PIN_BASED_EXT_INTR_MASK and not using HALT activity state is the only case that passes of the four that vmx.flat tests. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2016-11-03 19:18+0100, Paolo Bonzini: > On 03/11/2016 19:07, Radim Krčmář wrote: >> 2016-11-03 17:00+0100, Paolo Bonzini: >>> On 03/11/2016 16:03, Radim Krčmář wrote: >>>> 2016-11-03 14:30+0100, Paolo Bonzini: >>>>> On 26/10/2016 21:59, Radim Krčmář wrote: >>>>>> 2016-10-14 20:21+0200, Paolo Bonzini: >>>>>>> Calling apic_find_highest_irr results in IRR being scanned twice, >>>>>>> once in vmx_sync_pir_from_irr and once in apic_search_irr. Change >>>>>>> sync_pir_from_irr to do the RVI write and kvm_apic_update_irr to >>>>>>> compute the new RVI on the fly. >>>>>>> >>>>>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >>>>>>> --- >>>>>> >>>>>> Reviewed-by: Radim Krčmář <rkrcmar@redhat.com> >>>>> >>>>> Nope, this breaks nested VMX exit on external interrupt. For now I'm >>>>> testing only patch 1 and will push that one only to kvm/next. >> >> Which hypervisor is being nested? > > vmx.flat. :) > >> I think a bug is likely for hypervisors that don't enable >> PIN_BASED_EXT_INTR_MASK. The bug would trigger when >> kvm_cpu_has_interrupt() in vmx_check_nested_events() in >> kvm_arch_vcpu_runnable() queues the interrupt ... >> but I didn't see how this would have caused a problem. :) > > Ironically, _not_ enabling PIN_BASED_EXT_INTR_MASK and not using HALT > activity state is the only case that passes of the four that vmx.flat tests. Heh, the behavior is nice PASS: direct interrupt + hlt FAIL: intercepted interrupt + hlt FAIL: direct interrupt + activity state hlt FAIL: intercepted interrupt + activity state hlt -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Oh, I got distracted and sent without finishing ...] 2016-11-03 19:29+0100, Radim Krčmář: > 2016-11-03 19:18+0100, Paolo Bonzini: >> On 03/11/2016 19:07, Radim Krčmář wrote: >>> I think a bug is likely for hypervisors that don't enable >>> PIN_BASED_EXT_INTR_MASK. The bug would trigger when >>> kvm_cpu_has_interrupt() in vmx_check_nested_events() in >>> kvm_arch_vcpu_runnable() queues the interrupt ... >>> but I didn't see how this would have caused a problem. :) >> >> Ironically, _not_ enabling PIN_BASED_EXT_INTR_MASK and not using HALT >> activity state is the only case that passes of the four that vmx.flat tests. > > Heh, the behavior is nice > > PASS: direct interrupt + hlt > FAIL: intercepted interrupt + hlt > FAIL: direct interrupt + activity state hlt > FAIL: intercepted interrupt + activity state hlt but the 3rd one is racy, so I sometimes also get PASS: direct interrupt + hlt FAIL: intercepted interrupt + hlt PASS: direct interrupt + activity state hlt FAIL: intercepted interrupt + activity state hlt 1st and 3rd have disabled extint and 2nd and 4th enabled ... but that would mean that we a bug in a path that gets called in both cases, so calling vmx_hwapic_irr_update() isn't a problem ... and suddenly the bug becomes obvious: > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + int max_irr; > + > + if (!pi_test_on(&vmx->pi_desc)) We don't call vmx_hwapic_irr_update() when returning early. > + return; > + > + pi_clear_on(&vmx->pi_desc); > + max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); > + vmx_hwapic_irr_update(vcpu, max_irr); > +} > + > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > @@ -6611,8 +6611,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > * virtual interrupt delivery. > */ > if (vcpu->arch.apicv_active) > - kvm_x86_ops->hwapic_irr_update(vcpu, > - kvm_lapic_find_highest_irr(vcpu)); > + kvm_x86_ops->sync_pir_to_irr(vcpu); > } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/11/2016 21:16, Radim Krčmář wrote: > > + if (!pi_test_on(&vmx->pi_desc)) > > We don't call vmx_hwapic_irr_update() when returning early. This might be a good start, but it's on purpose: IRR is not changing and the invariant _should_ be that RVI=highest-bit(IRR): - IRR cleared by processor: see SDM 29.2.2 Virtual-interrupt delivery - IRR set by processor: see SDM 29.6 Posted-interrupt processing - IRR set by KVM: ON=1 so it doesn't exit here - IRR cleared by KVM: might indeed be buggy here, but the next patch does add a + kvm_x86_ops->hwapic_irr_update(vcpu, + apic_find_highest_irr(apic)); to apic_clear_irr, which doesn't fix the bug (and doesn't fix it also if backported here). So we're missing a place where IRR has changed but RVI is not being updated. It should be related to vmx_check_nested_events and kvm_cpu_has_interrupt as you said, but I cannot really see it. Paolo >> > + return; >> > + >> > + pi_clear_on(&vmx->pi_desc); >> > + max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); >> > + vmx_hwapic_irr_update(vcpu, max_irr); >> > +} -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4b20f7304b9c..6be110e53e9e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -941,7 +941,7 @@ struct kvm_x86_ops { void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); - void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); + int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f8157a36ab09..ffa541d38343 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4383,9 +4383,9 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) +static int svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) { - return; + return -1; // XXX } static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f272ccfddc48..2211e1774733 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8493,17 +8493,15 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) } } -static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int max_irr; if (!pi_test_on(&vmx->pi_desc)) return; pi_clear_on(&vmx->pi_desc); - max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); - vmx_hwapic_irr_update(vcpu, max_irr); + return kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); } static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 20d572c18293..e8789f3a9bfd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6675,7 +6675,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * notified with kvm_vcpu_kick. */ if (vcpu->arch.apicv_active) - kvm_x86_ops->sync_pir_to_irr(vcpu); + kvm_x86_ops->hwapic_irr_update(vcpu, kvm_x86_ops->sync_pir_to_irr(vcpu)); } if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests