Message ID | 20200529153934.11694-18-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: nSVM: event fixes and migration support | expand |
On 5/29/20 8:39 AM, Paolo Bonzini wrote: > The control state changes on every L2->L0 vmexit, and we will have to > serialize it in the nested state. So keep it up to date in svm->nested.ctl > and just copy them back to the nested VMCB in nested_svm_vmexit. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > arch/x86/kvm/svm/nested.c | 57 ++++++++++++++++++++++----------------- > arch/x86/kvm/svm/svm.c | 5 +++- > arch/x86/kvm/svm/svm.h | 1 + > 3 files changed, 38 insertions(+), 25 deletions(-) > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c > index 1e5f460b5540..921466eba556 100644 > --- a/arch/x86/kvm/svm/nested.c > +++ b/arch/x86/kvm/svm/nested.c > @@ -234,6 +234,34 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm, > svm->nested.ctl.iopm_base_pa &= ~0x0fffULL; > } > > +/* > + * Synchronize fields that are written by the processor, so that > + * they can be copied back into the nested_vmcb. > + */ > +void sync_nested_vmcb_control(struct vcpu_svm *svm) > +{ > + u32 mask; > + svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; > + svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; > + > + /* Only a few fields of int_ctl are written by the processor. */ > + mask = V_IRQ_MASK | V_TPR_MASK; > + if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) && > + is_intercept(svm, SVM_EXIT_VINTR)) { > + /* > + * In order to request an interrupt window, L0 is usurping > + * svm->vmcb->control.int_ctl and possibly setting V_IRQ > + * even if it was clear in L1's VMCB. Restoring it would be > + * wrong. However, in this case V_IRQ will remain true until > + * interrupt_window_interception calls svm_clear_vintr and > + * restores int_ctl. We can just leave it aside. > + */ > + mask &= ~V_IRQ_MASK; > + } > + svm->nested.ctl.int_ctl &= ~mask; > + svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask; > +} > + > static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb) > { > /* Load the nested guest state */ > @@ -471,6 +499,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) > /* Exit Guest-Mode */ > leave_guest_mode(&svm->vcpu); > svm->nested.vmcb = 0; > + WARN_ON_ONCE(svm->nested.nested_run_pending); > > /* in case we halted in L2 */ > svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; > @@ -497,8 +526,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) > nested_vmcb->save.dr6 = svm->vcpu.arch.dr6; > nested_vmcb->save.cpl = vmcb->save.cpl; > > - nested_vmcb->control.int_ctl = vmcb->control.int_ctl; > - nested_vmcb->control.int_vector = vmcb->control.int_vector; While it's not related to this patch, I am wondering if we need the following line in enter_svm_guest_mode(): svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; If int_vector is used only to trigger a #VMEXIT after we have decided to inject a virtual interrupt, we probably don't the vector value from the nested vmcb. > nested_vmcb->control.int_state = vmcb->control.int_state; > nested_vmcb->control.exit_code = vmcb->control.exit_code; > nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; > @@ -510,34 +537,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm) > if (svm->nrips_enabled) > nested_vmcb->control.next_rip = vmcb->control.next_rip; > > - /* > - * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have > - * to make sure that we do not lose injected events. So check event_inj > - * here and copy it to exit_int_info if it is valid. > - * Exit_int_info and event_inj can't be both valid because the case > - * below only happens on a VMRUN instruction intercept which has > - * no valid exit_int_info set. > - */ > - if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { > - struct vmcb_control_area *nc = &nested_vmcb->control; > - > - nc->exit_int_info = vmcb->control.event_inj; > - nc->exit_int_info_err = vmcb->control.event_inj_err; > - } > - > - nested_vmcb->control.tlb_ctl = 0; > - nested_vmcb->control.event_inj = 0; > - nested_vmcb->control.event_inj_err = 0; > + nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl; > + nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl; > + nested_vmcb->control.event_inj = svm->nested.ctl.event_inj; > + nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; > > nested_vmcb->control.pause_filter_count = > svm->vmcb->control.pause_filter_count; > nested_vmcb->control.pause_filter_thresh = > svm->vmcb->control.pause_filter_thresh; > > - /* We always set V_INTR_MASKING and remember the old value in hflags */ > - if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) > - nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; > - > /* Restore the original control entries */ > copy_vmcb_control_area(&vmcb->control, &hsave->control); > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 4122ba86bac2..b710e62ace16 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -3427,7 +3427,10 @@ static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) > sync_cr8_to_lapic(vcpu); > > svm->next_rip = 0; > - svm->nested.nested_run_pending = 0; > + if (is_guest_mode(&svm->vcpu)) { > + sync_nested_vmcb_control(svm); > + svm->nested.nested_run_pending = 0; I don't see any existence of nested_run_pending for nSVM either in Linus tree or KVM tree. Is there a patch that has this added but not pushed yet ? > + } > > svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; > > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index dd5418f20256..7e79f0af1204 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -394,6 +394,7 @@ int nested_svm_check_permissions(struct vcpu_svm *svm); > int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, > bool has_error_code, u32 error_code); > int nested_svm_exit_special(struct vcpu_svm *svm); > +void sync_nested_vmcb_control(struct vcpu_svm *svm); > > extern struct kvm_x86_nested_ops svm_nested_ops; >
On 30/05/20 04:06, Krish Sadhukhan wrote: >> >> - nested_vmcb->control.int_ctl = vmcb->control.int_ctl; >> - nested_vmcb->control.int_vector = vmcb->control.int_vector; > > > While it's not related to this patch, I am wondering if we need the > following line in enter_svm_guest_mode(): > > svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; > > If int_vector is used only to trigger a #VMEXIT after we have decided to > inject a virtual interrupt, we probably don't the vector value from the > nested vmcb. KVM does not use int_vector, but other hypervisor sometimes trigger virtual interrupts using int_ctl+int_vector instead of eventinj. (Unlike KVM they don't intercept EXIT_VINTR). Hyper-V operates like that. Paolo
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1e5f460b5540..921466eba556 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -234,6 +234,34 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm, svm->nested.ctl.iopm_base_pa &= ~0x0fffULL; } +/* + * Synchronize fields that are written by the processor, so that + * they can be copied back into the nested_vmcb. + */ +void sync_nested_vmcb_control(struct vcpu_svm *svm) +{ + u32 mask; + svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; + svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; + + /* Only a few fields of int_ctl are written by the processor. */ + mask = V_IRQ_MASK | V_TPR_MASK; + if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) && + is_intercept(svm, SVM_EXIT_VINTR)) { + /* + * In order to request an interrupt window, L0 is usurping + * svm->vmcb->control.int_ctl and possibly setting V_IRQ + * even if it was clear in L1's VMCB. Restoring it would be + * wrong. However, in this case V_IRQ will remain true until + * interrupt_window_interception calls svm_clear_vintr and + * restores int_ctl. We can just leave it aside. + */ + mask &= ~V_IRQ_MASK; + } + svm->nested.ctl.int_ctl &= ~mask; + svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask; +} + static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb) { /* Load the nested guest state */ @@ -471,6 +499,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) /* Exit Guest-Mode */ leave_guest_mode(&svm->vcpu); svm->nested.vmcb = 0; + WARN_ON_ONCE(svm->nested.nested_run_pending); /* in case we halted in L2 */ svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -497,8 +526,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.dr6 = svm->vcpu.arch.dr6; nested_vmcb->save.cpl = vmcb->save.cpl; - nested_vmcb->control.int_ctl = vmcb->control.int_ctl; - nested_vmcb->control.int_vector = vmcb->control.int_vector; nested_vmcb->control.int_state = vmcb->control.int_state; nested_vmcb->control.exit_code = vmcb->control.exit_code; nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; @@ -510,34 +537,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (svm->nrips_enabled) nested_vmcb->control.next_rip = vmcb->control.next_rip; - /* - * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have - * to make sure that we do not lose injected events. So check event_inj - * here and copy it to exit_int_info if it is valid. - * Exit_int_info and event_inj can't be both valid because the case - * below only happens on a VMRUN instruction intercept which has - * no valid exit_int_info set. - */ - if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { - struct vmcb_control_area *nc = &nested_vmcb->control; - - nc->exit_int_info = vmcb->control.event_inj; - nc->exit_int_info_err = vmcb->control.event_inj_err; - } - - nested_vmcb->control.tlb_ctl = 0; - nested_vmcb->control.event_inj = 0; - nested_vmcb->control.event_inj_err = 0; + nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl; + nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl; + nested_vmcb->control.event_inj = svm->nested.ctl.event_inj; + nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; nested_vmcb->control.pause_filter_count = svm->vmcb->control.pause_filter_count; nested_vmcb->control.pause_filter_thresh = svm->vmcb->control.pause_filter_thresh; - /* We always set V_INTR_MASKING and remember the old value in hflags */ - if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) - nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; - /* Restore the original control entries */ copy_vmcb_control_area(&vmcb->control, &hsave->control); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4122ba86bac2..b710e62ace16 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3427,7 +3427,10 @@ static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) sync_cr8_to_lapic(vcpu); svm->next_rip = 0; - svm->nested.nested_run_pending = 0; + if (is_guest_mode(&svm->vcpu)) { + sync_nested_vmcb_control(svm); + svm->nested.nested_run_pending = 0; + } svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index dd5418f20256..7e79f0af1204 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -394,6 +394,7 @@ int nested_svm_check_permissions(struct vcpu_svm *svm); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); +void sync_nested_vmcb_control(struct vcpu_svm *svm); extern struct kvm_x86_nested_ops svm_nested_ops;
The control state changes on every L2->L0 vmexit, and we will have to serialize it in the nested state. So keep it up to date in svm->nested.ctl and just copy them back to the nested VMCB in nested_svm_vmexit. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/svm/nested.c | 57 ++++++++++++++++++++++----------------- arch/x86/kvm/svm/svm.c | 5 +++- arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 38 insertions(+), 25 deletions(-)