[17/30] KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit
diff mbox series

Message ID 20200529153934.11694-18-pbonzini@redhat.com
State New
Headers show
Series
  • KVM: nSVM: event fixes and migration support
Related show

Commit Message

Paolo Bonzini May 29, 2020, 3:39 p.m. UTC
The control state changes on every L2->L0 vmexit, and we will have to
serialize it in the nested state.  So keep it up to date in svm->nested.ctl
and just copy them back to the nested VMCB in nested_svm_vmexit.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 57 ++++++++++++++++++++++-----------------
 arch/x86/kvm/svm/svm.c    |  5 +++-
 arch/x86/kvm/svm/svm.h    |  1 +
 3 files changed, 38 insertions(+), 25 deletions(-)

Comments

Krish Sadhukhan May 30, 2020, 2:06 a.m. UTC | #1
On 5/29/20 8:39 AM, Paolo Bonzini wrote:
> The control state changes on every L2->L0 vmexit, and we will have to
> serialize it in the nested state.  So keep it up to date in svm->nested.ctl
> and just copy them back to the nested VMCB in nested_svm_vmexit.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   arch/x86/kvm/svm/nested.c | 57 ++++++++++++++++++++++-----------------
>   arch/x86/kvm/svm/svm.c    |  5 +++-
>   arch/x86/kvm/svm/svm.h    |  1 +
>   3 files changed, 38 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 1e5f460b5540..921466eba556 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -234,6 +234,34 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm,
>   	svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL;
>   }
>   
> +/*
> + * Synchronize fields that are written by the processor, so that
> + * they can be copied back into the nested_vmcb.
> + */
> +void sync_nested_vmcb_control(struct vcpu_svm *svm)
> +{
> +	u32 mask;
> +	svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
> +	svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
> +
> +	/* Only a few fields of int_ctl are written by the processor.  */
> +	mask = V_IRQ_MASK | V_TPR_MASK;
> +	if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
> +	    is_intercept(svm, SVM_EXIT_VINTR)) {
> +		/*
> +		 * In order to request an interrupt window, L0 is usurping
> +		 * svm->vmcb->control.int_ctl and possibly setting V_IRQ
> +		 * even if it was clear in L1's VMCB.  Restoring it would be
> +		 * wrong.  However, in this case V_IRQ will remain true until
> +		 * interrupt_window_interception calls svm_clear_vintr and
> +		 * restores int_ctl.  We can just leave it aside.
> +		 */
> +		mask &= ~V_IRQ_MASK;
> +	}
> +	svm->nested.ctl.int_ctl        &= ~mask;
> +	svm->nested.ctl.int_ctl        |= svm->vmcb->control.int_ctl & mask;
> +}
> +
>   static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
>   {
>   	/* Load the nested guest state */
> @@ -471,6 +499,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
>   	/* Exit Guest-Mode */
>   	leave_guest_mode(&svm->vcpu);
>   	svm->nested.vmcb = 0;
> +	WARN_ON_ONCE(svm->nested.nested_run_pending);
>   
>   	/* in case we halted in L2 */
>   	svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
> @@ -497,8 +526,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
>   	nested_vmcb->save.dr6    = svm->vcpu.arch.dr6;
>   	nested_vmcb->save.cpl    = vmcb->save.cpl;
>   
> -	nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
> -	nested_vmcb->control.int_vector        = vmcb->control.int_vector;


While it's not related to this patch, I am wondering if we need the 
following line in enter_svm_guest_mode():

     svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;


If int_vector is used only to trigger a #VMEXIT after we have decided to 
inject a virtual interrupt, we probably don't the vector value from the 
nested vmcb.

>   	nested_vmcb->control.int_state         = vmcb->control.int_state;
>   	nested_vmcb->control.exit_code         = vmcb->control.exit_code;
>   	nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
> @@ -510,34 +537,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
>   	if (svm->nrips_enabled)
>   		nested_vmcb->control.next_rip  = vmcb->control.next_rip;
>   
> -	/*
> -	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
> -	 * to make sure that we do not lose injected events. So check event_inj
> -	 * here and copy it to exit_int_info if it is valid.
> -	 * Exit_int_info and event_inj can't be both valid because the case
> -	 * below only happens on a VMRUN instruction intercept which has
> -	 * no valid exit_int_info set.
> -	 */
> -	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
> -		struct vmcb_control_area *nc = &nested_vmcb->control;
> -
> -		nc->exit_int_info     = vmcb->control.event_inj;
> -		nc->exit_int_info_err = vmcb->control.event_inj_err;
> -	}
> -
> -	nested_vmcb->control.tlb_ctl           = 0;
> -	nested_vmcb->control.event_inj         = 0;
> -	nested_vmcb->control.event_inj_err     = 0;
> +	nested_vmcb->control.int_ctl           = svm->nested.ctl.int_ctl;
> +	nested_vmcb->control.tlb_ctl           = svm->nested.ctl.tlb_ctl;
> +	nested_vmcb->control.event_inj         = svm->nested.ctl.event_inj;
> +	nested_vmcb->control.event_inj_err     = svm->nested.ctl.event_inj_err;
>   
>   	nested_vmcb->control.pause_filter_count =
>   		svm->vmcb->control.pause_filter_count;
>   	nested_vmcb->control.pause_filter_thresh =
>   		svm->vmcb->control.pause_filter_thresh;
>   
> -	/* We always set V_INTR_MASKING and remember the old value in hflags */
> -	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
> -		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
> -
>   	/* Restore the original control entries */
>   	copy_vmcb_control_area(&vmcb->control, &hsave->control);
>   
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 4122ba86bac2..b710e62ace16 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3427,7 +3427,10 @@ static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
>   	sync_cr8_to_lapic(vcpu);
>   
>   	svm->next_rip = 0;
> -	svm->nested.nested_run_pending = 0;
> +	if (is_guest_mode(&svm->vcpu)) {
> +		sync_nested_vmcb_control(svm);
> +		svm->nested.nested_run_pending = 0;


I don't see any existence of nested_run_pending for nSVM either in Linus 
tree or KVM tree. Is there a  patch that has this added but not pushed yet ?

> +	}
>   
>   	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
>   
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index dd5418f20256..7e79f0af1204 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -394,6 +394,7 @@ int nested_svm_check_permissions(struct vcpu_svm *svm);
>   int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
>   			       bool has_error_code, u32 error_code);
>   int nested_svm_exit_special(struct vcpu_svm *svm);
> +void sync_nested_vmcb_control(struct vcpu_svm *svm);
>   
>   extern struct kvm_x86_nested_ops svm_nested_ops;
>
Paolo Bonzini May 30, 2020, 5:10 a.m. UTC | #2
On 30/05/20 04:06, Krish Sadhukhan wrote:
>>
>>   -    nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
>> -    nested_vmcb->control.int_vector        = vmcb->control.int_vector;
> 
> 
> While it's not related to this patch, I am wondering if we need the
> following line in enter_svm_guest_mode():
> 
>     svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
> 
> If int_vector is used only to trigger a #VMEXIT after we have decided to
> inject a virtual interrupt, we probably don't the vector value from the
> nested vmcb.

KVM does not use int_vector, but other hypervisor sometimes trigger
virtual interrupts using int_ctl+int_vector instead of eventinj.
(Unlike KVM they don't intercept EXIT_VINTR).  Hyper-V operates like that.

Paolo

Patch
diff mbox series

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1e5f460b5540..921466eba556 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -234,6 +234,34 @@  static void load_nested_vmcb_control(struct vcpu_svm *svm,
 	svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL;
 }
 
+/*
+ * Synchronize fields that are written by the processor, so that
+ * they can be copied back into the nested_vmcb.
+ */
+void sync_nested_vmcb_control(struct vcpu_svm *svm)
+{
+	u32 mask;
+	svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
+	svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
+
+	/* Only a few fields of int_ctl are written by the processor.  */
+	mask = V_IRQ_MASK | V_TPR_MASK;
+	if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
+	    is_intercept(svm, SVM_EXIT_VINTR)) {
+		/*
+		 * In order to request an interrupt window, L0 is usurping
+		 * svm->vmcb->control.int_ctl and possibly setting V_IRQ
+		 * even if it was clear in L1's VMCB.  Restoring it would be
+		 * wrong.  However, in this case V_IRQ will remain true until
+		 * interrupt_window_interception calls svm_clear_vintr and
+		 * restores int_ctl.  We can just leave it aside.
+		 */
+		mask &= ~V_IRQ_MASK;
+	}
+	svm->nested.ctl.int_ctl        &= ~mask;
+	svm->nested.ctl.int_ctl        |= svm->vmcb->control.int_ctl & mask;
+}
+
 static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
 {
 	/* Load the nested guest state */
@@ -471,6 +499,7 @@  int nested_svm_vmexit(struct vcpu_svm *svm)
 	/* Exit Guest-Mode */
 	leave_guest_mode(&svm->vcpu);
 	svm->nested.vmcb = 0;
+	WARN_ON_ONCE(svm->nested.nested_run_pending);
 
 	/* in case we halted in L2 */
 	svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -497,8 +526,6 @@  int nested_svm_vmexit(struct vcpu_svm *svm)
 	nested_vmcb->save.dr6    = svm->vcpu.arch.dr6;
 	nested_vmcb->save.cpl    = vmcb->save.cpl;
 
-	nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
-	nested_vmcb->control.int_vector        = vmcb->control.int_vector;
 	nested_vmcb->control.int_state         = vmcb->control.int_state;
 	nested_vmcb->control.exit_code         = vmcb->control.exit_code;
 	nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
@@ -510,34 +537,16 @@  int nested_svm_vmexit(struct vcpu_svm *svm)
 	if (svm->nrips_enabled)
 		nested_vmcb->control.next_rip  = vmcb->control.next_rip;
 
-	/*
-	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
-	 * to make sure that we do not lose injected events. So check event_inj
-	 * here and copy it to exit_int_info if it is valid.
-	 * Exit_int_info and event_inj can't be both valid because the case
-	 * below only happens on a VMRUN instruction intercept which has
-	 * no valid exit_int_info set.
-	 */
-	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
-		struct vmcb_control_area *nc = &nested_vmcb->control;
-
-		nc->exit_int_info     = vmcb->control.event_inj;
-		nc->exit_int_info_err = vmcb->control.event_inj_err;
-	}
-
-	nested_vmcb->control.tlb_ctl           = 0;
-	nested_vmcb->control.event_inj         = 0;
-	nested_vmcb->control.event_inj_err     = 0;
+	nested_vmcb->control.int_ctl           = svm->nested.ctl.int_ctl;
+	nested_vmcb->control.tlb_ctl           = svm->nested.ctl.tlb_ctl;
+	nested_vmcb->control.event_inj         = svm->nested.ctl.event_inj;
+	nested_vmcb->control.event_inj_err     = svm->nested.ctl.event_inj_err;
 
 	nested_vmcb->control.pause_filter_count =
 		svm->vmcb->control.pause_filter_count;
 	nested_vmcb->control.pause_filter_thresh =
 		svm->vmcb->control.pause_filter_thresh;
 
-	/* We always set V_INTR_MASKING and remember the old value in hflags */
-	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
-		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-
 	/* Restore the original control entries */
 	copy_vmcb_control_area(&vmcb->control, &hsave->control);
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4122ba86bac2..b710e62ace16 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3427,7 +3427,10 @@  static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 	sync_cr8_to_lapic(vcpu);
 
 	svm->next_rip = 0;
-	svm->nested.nested_run_pending = 0;
+	if (is_guest_mode(&svm->vcpu)) {
+		sync_nested_vmcb_control(svm);
+		svm->nested.nested_run_pending = 0;
+	}
 
 	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index dd5418f20256..7e79f0af1204 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -394,6 +394,7 @@  int nested_svm_check_permissions(struct vcpu_svm *svm);
 int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 			       bool has_error_code, u32 error_code);
 int nested_svm_exit_special(struct vcpu_svm *svm);
+void sync_nested_vmcb_control(struct vcpu_svm *svm);
 
 extern struct kvm_x86_nested_ops svm_nested_ops;