Message ID | 5133B0D7.8070203@web.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Il 03/03/2013 21:21, Jan Kiszka ha scritto: > From: Jan Kiszka <jan.kiszka@siemens.com> > > A VCPU sending INIT or SIPI to some other VCPU races for setting the > remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED > was overwritten by kvm_emulate_halt and, thus, got lost. > > Fix this by raising requests on the sender side that will then be > handled synchronously over the target VCPU context. > > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > --- > > Turned out to be simpler than expected. I'm no longer able to reproduce > the race I saw before. > > arch/x86/kvm/lapic.c | 9 ++++----- > arch/x86/kvm/x86.c | 16 +++++++++++++++- > include/linux/kvm_host.h | 2 ++ > 3 files changed, 21 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 02b51dd..be1e37a 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > case APIC_DM_INIT: > if (!trig_mode || level) { > result = 1; > - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; > - kvm_make_request(KVM_REQ_EVENT, vcpu); > + kvm_make_request(KVM_REQ_INIT, vcpu); > kvm_vcpu_kick(vcpu); > } else { > apic_debug("Ignoring de-assert INIT to vcpu %d\n", > @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > case APIC_DM_STARTUP: > apic_debug("SIPI to vcpu %d vector 0x%02x\n", > vcpu->vcpu_id, vector); > - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { > + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || > + test_bit(KVM_REQ_INIT, &vcpu->requests)) { > result = 1; > vcpu->arch.sipi_vector = vector; > - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; > - kvm_make_request(KVM_REQ_EVENT, vcpu); > + kvm_make_request(KVM_REQ_SIPI, vcpu); > kvm_vcpu_kick(vcpu); > } > break; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index d0cf737..8c8843c 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) > kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); > } > > +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) > +{ > + if (kvm_check_request(KVM_REQ_INIT, vcpu)) > + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; > + if (kvm_check_request(KVM_REQ_SIPI, vcpu) && > + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { > + vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; Do you need KVM_MP_STATE_SIPI_RECEIVED at all anymore? Perhaps you can call kvm_check_init_and_sipi from __vcpu_run, before the call to kvm_vcpu_block (and move the reset from __vcpu_run to kvm_check_init_and_sipi too)? Then you do not even need to touch kvm_arch_vcpu_runnable. > + return true; > + } > + return false; > +} > + > static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > { > int r; > @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > bool req_immediate_exit = 0; > > if (vcpu->requests) { > + kvm_check_init_and_sipi(vcpu); Does this need to return 1 if kvm_check_init_and_sipi returns 1? Otherwise the guest is entered in INIT state. I think. Paolo > if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) > kvm_mmu_unload(vcpu); > if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) > @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, > > int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) > { > + if (kvm_check_init_and_sipi(vcpu)) > + return 1; > return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && > !vcpu->arch.apf.halted) > || !list_empty_careful(&vcpu->async_pf.done) > - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED > || atomic_read(&vcpu->arch.nmi_queued) || > (kvm_arch_interrupt_allowed(vcpu) && > kvm_cpu_has_interrupt(vcpu)); > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 722cae7..1a191c9 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -124,6 +124,8 @@ static inline bool is_error_page(struct page *page) > #define KVM_REQ_MCLOCK_INPROGRESS 20 > #define KVM_REQ_EPR_EXIT 21 > #define KVM_REQ_EOIBITMAP 22 > +#define KVM_REQ_INIT 23 > +#define KVM_REQ_SIPI 24 > > #define KVM_USERSPACE_IRQ_SOURCE_ID 0 > #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-03-04 15:28, Paolo Bonzini wrote: > Il 03/03/2013 21:21, Jan Kiszka ha scritto: >> From: Jan Kiszka <jan.kiszka@siemens.com> >> >> A VCPU sending INIT or SIPI to some other VCPU races for setting the >> remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED >> was overwritten by kvm_emulate_halt and, thus, got lost. >> >> Fix this by raising requests on the sender side that will then be >> handled synchronously over the target VCPU context. >> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >> --- >> >> Turned out to be simpler than expected. I'm no longer able to reproduce >> the race I saw before. >> >> arch/x86/kvm/lapic.c | 9 ++++----- >> arch/x86/kvm/x86.c | 16 +++++++++++++++- >> include/linux/kvm_host.h | 2 ++ >> 3 files changed, 21 insertions(+), 6 deletions(-) >> >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c >> index 02b51dd..be1e37a 100644 >> --- a/arch/x86/kvm/lapic.c >> +++ b/arch/x86/kvm/lapic.c >> @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, >> case APIC_DM_INIT: >> if (!trig_mode || level) { >> result = 1; >> - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; >> - kvm_make_request(KVM_REQ_EVENT, vcpu); >> + kvm_make_request(KVM_REQ_INIT, vcpu); >> kvm_vcpu_kick(vcpu); >> } else { >> apic_debug("Ignoring de-assert INIT to vcpu %d\n", >> @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, >> case APIC_DM_STARTUP: >> apic_debug("SIPI to vcpu %d vector 0x%02x\n", >> vcpu->vcpu_id, vector); >> - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { >> + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || >> + test_bit(KVM_REQ_INIT, &vcpu->requests)) { >> result = 1; >> vcpu->arch.sipi_vector = vector; >> - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; >> - kvm_make_request(KVM_REQ_EVENT, vcpu); >> + kvm_make_request(KVM_REQ_SIPI, vcpu); >> kvm_vcpu_kick(vcpu); >> } >> break; >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index d0cf737..8c8843c 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) >> kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); >> } >> >> +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) >> +{ >> + if (kvm_check_request(KVM_REQ_INIT, vcpu)) >> + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; >> + if (kvm_check_request(KVM_REQ_SIPI, vcpu) && >> + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { >> + vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; > > Do you need KVM_MP_STATE_SIPI_RECEIVED at all anymore? Perhaps you can > call kvm_check_init_and_sipi from __vcpu_run, before the call to > kvm_vcpu_block (and move the reset from __vcpu_run to > kvm_check_init_and_sipi too)? Then you do not even need to touch > kvm_arch_vcpu_runnable. Haven't thought about this in details yet as I first wanted to fix within the existing infrastructure. But maybe the change below requires more refactoring anyway. Let's see. > >> + return true; >> + } >> + return false; >> +} >> + >> static int vcpu_enter_guest(struct kvm_vcpu *vcpu) >> { >> int r; >> @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) >> bool req_immediate_exit = 0; >> >> if (vcpu->requests) { >> + kvm_check_init_and_sipi(vcpu); > > Does this need to return 1 if kvm_check_init_and_sipi returns 1? > Otherwise the guest is entered in INIT state. I think. Hmm, true... Need to refactor things a bit more as kvm_check_init_and_sipi is designed to return true only for wait-on-sipi->runnable transition. Thanks, Jan
On Sun, Mar 03, 2013 at 09:21:43PM +0100, Jan Kiszka wrote: > From: Jan Kiszka <jan.kiszka@siemens.com> > > A VCPU sending INIT or SIPI to some other VCPU races for setting the > remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED > was overwritten by kvm_emulate_halt and, thus, got lost. > > Fix this by raising requests on the sender side that will then be > handled synchronously over the target VCPU context. > > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > --- > > Turned out to be simpler than expected. I'm no longer able to reproduce > the race I saw before. > > arch/x86/kvm/lapic.c | 9 ++++----- > arch/x86/kvm/x86.c | 16 +++++++++++++++- > include/linux/kvm_host.h | 2 ++ > 3 files changed, 21 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 02b51dd..be1e37a 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > case APIC_DM_INIT: > if (!trig_mode || level) { > result = 1; > - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; > - kvm_make_request(KVM_REQ_EVENT, vcpu); > + kvm_make_request(KVM_REQ_INIT, vcpu); > kvm_vcpu_kick(vcpu); > } else { > apic_debug("Ignoring de-assert INIT to vcpu %d\n", > @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > case APIC_DM_STARTUP: > apic_debug("SIPI to vcpu %d vector 0x%02x\n", > vcpu->vcpu_id, vector); > - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { > + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || > + test_bit(KVM_REQ_INIT, &vcpu->requests)) { > result = 1; > vcpu->arch.sipi_vector = vector; > - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; > - kvm_make_request(KVM_REQ_EVENT, vcpu); > + kvm_make_request(KVM_REQ_SIPI, vcpu); > kvm_vcpu_kick(vcpu); > } > break; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index d0cf737..8c8843c 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) > kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); > } > > +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) > +{ > + if (kvm_check_request(KVM_REQ_INIT, vcpu)) > + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; > + if (kvm_check_request(KVM_REQ_SIPI, vcpu) && > + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { > + vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; > + return true; > + } > + return false; > +} > + > static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > { > int r; > @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > bool req_immediate_exit = 0; > > if (vcpu->requests) { > + kvm_check_init_and_sipi(vcpu); > if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) > kvm_mmu_unload(vcpu); > if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) > @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, > > int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) > { > + if (kvm_check_init_and_sipi(vcpu)) > + return 1; > return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && > !vcpu->arch.apf.halted) > || !list_empty_careful(&vcpu->async_pf.done) > - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED > || atomic_read(&vcpu->arch.nmi_queued) || > (kvm_arch_interrupt_allowed(vcpu) && > kvm_cpu_has_interrupt(vcpu)); This makes two subsequent calls to kvm_arch_vcpu_runnable() return different values if SIPI is pending. While it may not cause problem to current code (I haven't thought it through) with such semantics you gonna have a bad time. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-03-04 19:08, Gleb Natapov wrote: > On Sun, Mar 03, 2013 at 09:21:43PM +0100, Jan Kiszka wrote: >> From: Jan Kiszka <jan.kiszka@siemens.com> >> >> A VCPU sending INIT or SIPI to some other VCPU races for setting the >> remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED >> was overwritten by kvm_emulate_halt and, thus, got lost. >> >> Fix this by raising requests on the sender side that will then be >> handled synchronously over the target VCPU context. >> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >> --- >> >> Turned out to be simpler than expected. I'm no longer able to reproduce >> the race I saw before. >> >> arch/x86/kvm/lapic.c | 9 ++++----- >> arch/x86/kvm/x86.c | 16 +++++++++++++++- >> include/linux/kvm_host.h | 2 ++ >> 3 files changed, 21 insertions(+), 6 deletions(-) >> >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c >> index 02b51dd..be1e37a 100644 >> --- a/arch/x86/kvm/lapic.c >> +++ b/arch/x86/kvm/lapic.c >> @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, >> case APIC_DM_INIT: >> if (!trig_mode || level) { >> result = 1; >> - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; >> - kvm_make_request(KVM_REQ_EVENT, vcpu); >> + kvm_make_request(KVM_REQ_INIT, vcpu); >> kvm_vcpu_kick(vcpu); >> } else { >> apic_debug("Ignoring de-assert INIT to vcpu %d\n", >> @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, >> case APIC_DM_STARTUP: >> apic_debug("SIPI to vcpu %d vector 0x%02x\n", >> vcpu->vcpu_id, vector); >> - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { >> + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || >> + test_bit(KVM_REQ_INIT, &vcpu->requests)) { >> result = 1; >> vcpu->arch.sipi_vector = vector; >> - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; >> - kvm_make_request(KVM_REQ_EVENT, vcpu); >> + kvm_make_request(KVM_REQ_SIPI, vcpu); >> kvm_vcpu_kick(vcpu); >> } >> break; >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index d0cf737..8c8843c 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) >> kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); >> } >> >> +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) >> +{ >> + if (kvm_check_request(KVM_REQ_INIT, vcpu)) >> + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; >> + if (kvm_check_request(KVM_REQ_SIPI, vcpu) && >> + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { >> + vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; >> + return true; >> + } >> + return false; >> +} >> + >> static int vcpu_enter_guest(struct kvm_vcpu *vcpu) >> { >> int r; >> @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) >> bool req_immediate_exit = 0; >> >> if (vcpu->requests) { >> + kvm_check_init_and_sipi(vcpu); >> if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) >> kvm_mmu_unload(vcpu); >> if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) >> @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, >> >> int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) >> { >> + if (kvm_check_init_and_sipi(vcpu)) >> + return 1; >> return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && >> !vcpu->arch.apf.halted) >> || !list_empty_careful(&vcpu->async_pf.done) >> - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED >> || atomic_read(&vcpu->arch.nmi_queued) || >> (kvm_arch_interrupt_allowed(vcpu) && >> kvm_cpu_has_interrupt(vcpu)); > This makes two subsequent calls to kvm_arch_vcpu_runnable() return > different values if SIPI is pending. While it may not cause problem to > current code (I haven't thought it through) with such semantics you > gonna have a bad time. If I manage to follow Paolo's suggestion to eliminate the SIPI_RECEIVED state and all the staged logic around it, that might change. Will be more invasive but likely cleaner in its result. Jan
On 2013-03-04 15:28, Paolo Bonzini wrote: > Il 03/03/2013 21:21, Jan Kiszka ha scritto: >> From: Jan Kiszka <jan.kiszka@siemens.com> >> >> A VCPU sending INIT or SIPI to some other VCPU races for setting the >> remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED >> was overwritten by kvm_emulate_halt and, thus, got lost. >> >> Fix this by raising requests on the sender side that will then be >> handled synchronously over the target VCPU context. >> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >> --- >> >> Turned out to be simpler than expected. I'm no longer able to reproduce >> the race I saw before. >> >> arch/x86/kvm/lapic.c | 9 ++++----- >> arch/x86/kvm/x86.c | 16 +++++++++++++++- >> include/linux/kvm_host.h | 2 ++ >> 3 files changed, 21 insertions(+), 6 deletions(-) >> >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c >> index 02b51dd..be1e37a 100644 >> --- a/arch/x86/kvm/lapic.c >> +++ b/arch/x86/kvm/lapic.c >> @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, >> case APIC_DM_INIT: >> if (!trig_mode || level) { >> result = 1; >> - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; >> - kvm_make_request(KVM_REQ_EVENT, vcpu); >> + kvm_make_request(KVM_REQ_INIT, vcpu); >> kvm_vcpu_kick(vcpu); >> } else { >> apic_debug("Ignoring de-assert INIT to vcpu %d\n", >> @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, >> case APIC_DM_STARTUP: >> apic_debug("SIPI to vcpu %d vector 0x%02x\n", >> vcpu->vcpu_id, vector); >> - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { >> + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || >> + test_bit(KVM_REQ_INIT, &vcpu->requests)) { >> result = 1; >> vcpu->arch.sipi_vector = vector; >> - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; >> - kvm_make_request(KVM_REQ_EVENT, vcpu); >> + kvm_make_request(KVM_REQ_SIPI, vcpu); >> kvm_vcpu_kick(vcpu); >> } >> break; >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index d0cf737..8c8843c 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) >> kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); >> } >> >> +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) >> +{ >> + if (kvm_check_request(KVM_REQ_INIT, vcpu)) >> + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; >> + if (kvm_check_request(KVM_REQ_SIPI, vcpu) && >> + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { >> + vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; > > Do you need KVM_MP_STATE_SIPI_RECEIVED at all anymore? Unfortunately, we cannot kill it as it was user-visible: When a VCPU receives KVM_MP_STATE_SIPI_RECEIVED, it leaves __vcpu_run with -EINTR and, thus, KVM_RUN. We actually return to userspace, allowing it to see this mp_state and also migrate the guest in this state. I could avoid this userspace exit (not sure what it is good for) but we will have to keep the logic to accept and convert the state into KVM_MP_STATE_RUNNABLE. So there is not much to simplify here, I'm afraid. Jan
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd..be1e37a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_make_request(KVM_REQ_INIT, vcpu); kvm_vcpu_kick(vcpu); } else { apic_debug("Ignoring de-assert INIT to vcpu %d\n", @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug("SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || + test_bit(KVM_REQ_INIT, &vcpu->requests)) { result = 1; vcpu->arch.sipi_vector = vector; - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_make_request(KVM_REQ_SIPI, vcpu); kvm_vcpu_kick(vcpu); } break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0cf737..8c8843c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) +{ + if (kvm_check_request(KVM_REQ_INIT, vcpu)) + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + if (kvm_check_request(KVM_REQ_SIPI, vcpu) && + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; + return true; + } + return false; +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = 0; if (vcpu->requests) { + kvm_check_init_and_sipi(vcpu); if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + if (kvm_check_init_and_sipi(vcpu)) + return 1; return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted) || !list_empty_careful(&vcpu->async_pf.done) - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED || atomic_read(&vcpu->arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 722cae7..1a191c9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -124,6 +124,8 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_MCLOCK_INPROGRESS 20 #define KVM_REQ_EPR_EXIT 21 #define KVM_REQ_EOIBITMAP 22 +#define KVM_REQ_INIT 23 +#define KVM_REQ_SIPI 24 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1