diff mbox series

[1/8] KVM: nSVM: Sync next_rip field from vmcb12 to vmcb02

Message ID 20220402010903.727604-2-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: SVM: Fix soft int/ex re-injection | expand

Commit Message

Sean Christopherson April 2, 2022, 1:08 a.m. UTC
From: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>

The next_rip field of a VMCB is *not* an output-only field for a VMRUN.
This field value (instead of the saved guest RIP) in used by the CPU for
the return address pushed on stack when injecting a software interrupt or
INT3 or INTO exception.

Make sure this field gets synced from vmcb12 to vmcb02 when entering L2 or
loading a nested state and NRIPS is exposed to L1.  If NRIPS is supported
in hardware but not exposed to L1 (nrips=0 or hidden by userspace), stuff
vmcb02's next_rip from the new L2 RIP to emulate a !NRIPS CPU (which
saves RIP on the stack as-is).

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/nested.c | 22 +++++++++++++++++++---
 arch/x86/kvm/svm/svm.h    |  1 +
 2 files changed, 20 insertions(+), 3 deletions(-)

Comments

Maxim Levitsky April 4, 2022, 9:54 a.m. UTC | #1
On Sat, 2022-04-02 at 01:08 +0000, Sean Christopherson wrote:
> From: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> 
> The next_rip field of a VMCB is *not* an output-only field for a VMRUN.
> This field value (instead of the saved guest RIP) in used by the CPU for
> the return address pushed on stack when injecting a software interrupt or
> INT3 or INTO exception.
> 
> Make sure this field gets synced from vmcb12 to vmcb02 when entering L2 or
> loading a nested state and NRIPS is exposed to L1.  If NRIPS is supported
> in hardware but not exposed to L1 (nrips=0 or hidden by userspace), stuff
> vmcb02's next_rip from the new L2 RIP to emulate a !NRIPS CPU (which
> saves RIP on the stack as-is).
> 
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/svm/nested.c | 22 +++++++++++++++++++---
>  arch/x86/kvm/svm/svm.h    |  1 +
>  2 files changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 73b545278f5f..9a6dc2b38fcf 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -369,6 +369,7 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
>  	to->nested_ctl          = from->nested_ctl;
>  	to->event_inj           = from->event_inj;
>  	to->event_inj_err       = from->event_inj_err;
> +	to->next_rip            = from->next_rip;
>  	to->nested_cr3          = from->nested_cr3;
>  	to->virt_ext            = from->virt_ext;
>  	to->pause_filter_count  = from->pause_filter_count;
> @@ -606,7 +607,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
>  	}
>  }
>  
> -static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
> +static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
> +					  unsigned long vmcb12_rip)
>  {
>  	u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
>  	u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
> @@ -660,6 +662,19 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
>  	vmcb02->control.event_inj           = svm->nested.ctl.event_inj;
>  	vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
>  
> +	/*
> +	 * next_rip is consumed on VMRUN as the return address pushed on the
> +	 * stack for injected soft exceptions/interrupts.  If nrips is exposed
> +	 * to L1, take it verbatim from vmcb12.  If nrips is supported in
> +	 * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
> +	 * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
> +	 * prior to injecting the event).
> +	 */
> +	if (svm->nrips_enabled)
> +		vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
> +	else if (boot_cpu_has(X86_FEATURE_NRIPS))
> +		vmcb02->control.next_rip    = vmcb12_rip;
> +
>  	vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
>  					      LBR_CTL_ENABLE_MASK;
>  	if (svm->lbrv_enabled)
> @@ -743,7 +758,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
>  	nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
>  
>  	svm_switch_vmcb(svm, &svm->nested.vmcb02);
> -	nested_vmcb02_prepare_control(svm);
> +	nested_vmcb02_prepare_control(svm, vmcb12->save.rip);
>  	nested_vmcb02_prepare_save(svm, vmcb12);
>  
>  	ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
> @@ -1422,6 +1437,7 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
>  	dst->nested_ctl           = from->nested_ctl;
>  	dst->event_inj            = from->event_inj;
>  	dst->event_inj_err        = from->event_inj_err;
> +	dst->next_rip             = from->next_rip;
>  	dst->nested_cr3           = from->nested_cr3;
>  	dst->virt_ext              = from->virt_ext;
>  	dst->pause_filter_count   = from->pause_filter_count;
> @@ -1606,7 +1622,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>  	nested_copy_vmcb_control_to_cache(svm, ctl);
>  
>  	svm_switch_vmcb(svm, &svm->nested.vmcb02);
> -	nested_vmcb02_prepare_control(svm);
> +	nested_vmcb02_prepare_control(svm, save->rip);
>  
>  	/*
>  	 * While the nested guest CR3 is already checked and set by
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index e246793cbeae..47e7427d0395 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -139,6 +139,7 @@ struct vmcb_ctrl_area_cached {
>  	u64 nested_ctl;
>  	u32 event_inj;
>  	u32 event_inj_err;
> +	u64 next_rip;
>  	u64 nested_cr3;
>  	u64 virt_ext;
>  	u32 clean;

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Best regards,
	Maxim Levitsky
Maciej S. Szmigiero April 4, 2022, 4:50 p.m. UTC | #2
On 2.04.2022 03:08, Sean Christopherson wrote:
> From: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> 
> The next_rip field of a VMCB is *not* an output-only field for a VMRUN.
> This field value (instead of the saved guest RIP) in used by the CPU for
> the return address pushed on stack when injecting a software interrupt or
> INT3 or INTO exception.
> 
> Make sure this field gets synced from vmcb12 to vmcb02 when entering L2 or
> loading a nested state and NRIPS is exposed to L1.  If NRIPS is supported
> in hardware but not exposed to L1 (nrips=0 or hidden by userspace), stuff
> vmcb02's next_rip from the new L2 RIP to emulate a !NRIPS CPU (which
> saves RIP on the stack as-is).
> 
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/kvm/svm/nested.c | 22 +++++++++++++++++++---
>   arch/x86/kvm/svm/svm.h    |  1 +
>   2 files changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 73b545278f5f..9a6dc2b38fcf 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -369,6 +369,7 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
>   	to->nested_ctl          = from->nested_ctl;
>   	to->event_inj           = from->event_inj;
>   	to->event_inj_err       = from->event_inj_err;
> +	to->next_rip            = from->next_rip;
>   	to->nested_cr3          = from->nested_cr3;
>   	to->virt_ext            = from->virt_ext;
>   	to->pause_filter_count  = from->pause_filter_count;
> @@ -606,7 +607,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
>   	}
>   }
>   
> -static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
> +static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
> +					  unsigned long vmcb12_rip)
>   {
>   	u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
>   	u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
> @@ -660,6 +662,19 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
>   	vmcb02->control.event_inj           = svm->nested.ctl.event_inj;
>   	vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
>   
> +	/*
> +	 * next_rip is consumed on VMRUN as the return address pushed on the
> +	 * stack for injected soft exceptions/interrupts.  If nrips is exposed
> +	 * to L1, take it verbatim from vmcb12.  If nrips is supported in
> +	 * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
> +	 * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
> +	 * prior to injecting the event).
> +	 */
> +	if (svm->nrips_enabled)
> +		vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
> +	else if (boot_cpu_has(X86_FEATURE_NRIPS))
> +		vmcb02->control.next_rip    = vmcb12_rip;
> +
>   	vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
>   					      LBR_CTL_ENABLE_MASK;
>   	if (svm->lbrv_enabled)
> @@ -743,7 +758,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
>   	nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
>   
>   	svm_switch_vmcb(svm, &svm->nested.vmcb02);
> -	nested_vmcb02_prepare_control(svm);
> +	nested_vmcb02_prepare_control(svm, vmcb12->save.rip);
>   	nested_vmcb02_prepare_save(svm, vmcb12);
>   
>   	ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
> @@ -1422,6 +1437,7 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
>   	dst->nested_ctl           = from->nested_ctl;
>   	dst->event_inj            = from->event_inj;
>   	dst->event_inj_err        = from->event_inj_err;
> +	dst->next_rip             = from->next_rip;
>   	dst->nested_cr3           = from->nested_cr3;
>   	dst->virt_ext              = from->virt_ext;
>   	dst->pause_filter_count   = from->pause_filter_count;
> @@ -1606,7 +1622,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>   	nested_copy_vmcb_control_to_cache(svm, ctl);
>   
>   	svm_switch_vmcb(svm, &svm->nested.vmcb02);
> -	nested_vmcb02_prepare_control(svm);
> +	nested_vmcb02_prepare_control(svm, save->rip);
>   

					   ^
I guess this should be "svm->vmcb->save.rip", since
KVM_{GET,SET}_NESTED_STATE "save" field contains vmcb01 data,
not vmcb{0,1}2 (in contrast to the "control" field).


Thanks,
Maciej
Sean Christopherson April 4, 2022, 5:21 p.m. UTC | #3
On Mon, Apr 04, 2022, Maciej S. Szmigiero wrote:
> > @@ -1606,7 +1622,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> >   	nested_copy_vmcb_control_to_cache(svm, ctl);
> >   	svm_switch_vmcb(svm, &svm->nested.vmcb02);
> > -	nested_vmcb02_prepare_control(svm);
> > +	nested_vmcb02_prepare_control(svm, save->rip);
> 
> 					   ^
> I guess this should be "svm->vmcb->save.rip", since
> KVM_{GET,SET}_NESTED_STATE "save" field contains vmcb01 data,
> not vmcb{0,1}2 (in contrast to the "control" field).

Argh, yes.  Is userspace required to set L2 guest state prior to KVM_SET_NESTED_STATE?
If not, this will result in garbage being loaded into vmcb02.
Maciej S. Szmigiero April 4, 2022, 5:45 p.m. UTC | #4
On 4.04.2022 19:21, Sean Christopherson wrote:
> On Mon, Apr 04, 2022, Maciej S. Szmigiero wrote:
>>> @@ -1606,7 +1622,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>>>    	nested_copy_vmcb_control_to_cache(svm, ctl);
>>>    	svm_switch_vmcb(svm, &svm->nested.vmcb02);
>>> -	nested_vmcb02_prepare_control(svm);
>>> +	nested_vmcb02_prepare_control(svm, save->rip);
>>
>> 					   ^
>> I guess this should be "svm->vmcb->save.rip", since
>> KVM_{GET,SET}_NESTED_STATE "save" field contains vmcb01 data,
>> not vmcb{0,1}2 (in contrast to the "control" field).
> 
> Argh, yes.  Is userspace required to set L2 guest state prior to KVM_SET_NESTED_STATE?
> If not, this will result in garbage being loaded into vmcb02.

I'm not sure about particular KVM API guarantees,
but looking at the code I guess it is supposed to handle both cases:
1) VMM loads the usual basic KVM state via KVM_SET_{S,}REGS then immediately
issues KVM_SET_NESTED_STATE to load the remaining nested data.

Assuming that it was the L2 that was running at the save time,
at first the basic L2 state will be loaded into vmcb01,
then at KVM_SET_NESTED_STATE time:
> if (is_guest_mode(vcpu))
>     svm_leave_nested(vcpu);
> else
>     svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;

The !is_guest_mode(vcpu) branch will be taken (since the new VM haven't entered
the guest mode yet), which will copy the basic L2 state from vmcb01 to vmcb02 and
then the remaining code will restore vmcb01 save and vmcb{0,1}2 control normally and
then enter the guest mode.

2) VMM first issues KVM_SET_NESTED_STATE then immediately loads the basic state.

Sane as the above, only some initial VM state will be copied into vmcb02 from vmcb01
by the code mentioned above, then vmcb01 save and vmcb{0,1}2 control will be restored
and guest mode will be entered.
If the VMM then immediately issues KVM_SET_{S,}REGS then it will restore L2 basic state
straight into vmcb02.

However, this all is my guess work from just looking at the relevant code,
I haven't run any tests to make sure that I haven't missed something.

Thanks,
Maciej
Paolo Bonzini April 20, 2022, 3 p.m. UTC | #5
On 4/4/22 19:21, Sean Christopherson wrote:
> On Mon, Apr 04, 2022, Maciej S. Szmigiero wrote:
>>> @@ -1606,7 +1622,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>>>    	nested_copy_vmcb_control_to_cache(svm, ctl);
>>>    	svm_switch_vmcb(svm, &svm->nested.vmcb02);
>>> -	nested_vmcb02_prepare_control(svm);
>>> +	nested_vmcb02_prepare_control(svm, save->rip);
>>
>> 					   ^
>> I guess this should be "svm->vmcb->save.rip", since
>> KVM_{GET,SET}_NESTED_STATE "save" field contains vmcb01 data,
>> not vmcb{0,1}2 (in contrast to the "control" field).
> 
> Argh, yes.  Is userspace required to set L2 guest state prior to KVM_SET_NESTED_STATE?
> If not, this will result in garbage being loaded into vmcb02.
> 

Let's just require X86_FEATURE_NRIPS, either in general or just to
enable nested virtualiazation, i.e.:

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index fc1725b7d05f..f8fc8a1b09f1 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4904,10 +4904,12 @@ static __init int svm_hardware_setup(void)
  			goto err;
  	}
  
-	if (nrips) {
-		if (!boot_cpu_has(X86_FEATURE_NRIPS))
-			nrips = false;
-	}
+	if (!boot_cpu_has(X86_FEATURE_NRIPS))
+		nrips = false;
+	if (nested & !nrips) {
+		pr_warn("Next RIP Save not available, disabling nested virtualization\n");
+		nested = false;
+	}
  
  	enable_apicv = avic = avic && npt_enabled && (boot_cpu_has(X86_FEATURE_AVIC) || force_avic);
  

If I looked it up correctly it was introduced around 2010-2011.

Paolo
Maciej S. Szmigiero April 20, 2022, 3:05 p.m. UTC | #6
On 20.04.2022 17:00, Paolo Bonzini wrote:
> On 4/4/22 19:21, Sean Christopherson wrote:
>> On Mon, Apr 04, 2022, Maciej S. Szmigiero wrote:
>>>> @@ -1606,7 +1622,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>>>>        nested_copy_vmcb_control_to_cache(svm, ctl);
>>>>        svm_switch_vmcb(svm, &svm->nested.vmcb02);
>>>> -    nested_vmcb02_prepare_control(svm);
>>>> +    nested_vmcb02_prepare_control(svm, save->rip);
>>>
>>>                        ^
>>> I guess this should be "svm->vmcb->save.rip", since
>>> KVM_{GET,SET}_NESTED_STATE "save" field contains vmcb01 data,
>>> not vmcb{0,1}2 (in contrast to the "control" field).
>>
>> Argh, yes.  Is userspace required to set L2 guest state prior to KVM_SET_NESTED_STATE?
>> If not, this will result in garbage being loaded into vmcb02.
>>
> 
> Let's just require X86_FEATURE_NRIPS, either in general or just to
> enable nested virtualiazation


Sean Christopherson April 20, 2022, 4:15 p.m. UTC | #7
On Wed, Apr 20, 2022, Maciej S. Szmigiero wrote:
> On 20.04.2022 17:00, Paolo Bonzini wrote:
> > On 4/4/22 19:21, Sean Christopherson wrote:
> > > On Mon, Apr 04, 2022, Maciej S. Szmigiero wrote:
> > > > > @@ -1606,7 +1622,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> > > > >        nested_copy_vmcb_control_to_cache(svm, ctl);
> > > > >        svm_switch_vmcb(svm, &svm->nested.vmcb02);
> > > > > -    nested_vmcb02_prepare_control(svm);
> > > > > +    nested_vmcb02_prepare_control(svm, save->rip);
> > > > 
> > > >                        ^
> > > > I guess this should be "svm->vmcb->save.rip", since
> > > > KVM_{GET,SET}_NESTED_STATE "save" field contains vmcb01 data,
> > > > not vmcb{0,1}2 (in contrast to the "control" field).
> > > 
> > > Argh, yes.  Is userspace required to set L2 guest state prior to KVM_SET_NESTED_STATE?
> > > If not, this will result in garbage being loaded into vmcb02.
> > > 
> > 
> > Let's just require X86_FEATURE_NRIPS, either in general or just to
> > enable nested virtualiazation
> 
> 
Paolo Bonzini April 20, 2022, 4:33 p.m. UTC | #8
On 4/20/22 18:15, Sean Christopherson wrote:
>>> Let's just require X86_FEATURE_NRIPS, either in general or just to
>>> enable nested virtualiazation
>> 
Sean Christopherson April 20, 2022, 4:44 p.m. UTC | #9
On Wed, Apr 20, 2022, Paolo Bonzini wrote:
> On 4/20/22 18:15, Sean Christopherson wrote:
> > > > Let's just require X86_FEATURE_NRIPS, either in general or just to
> > > > enable nested virtualiazation
> > > 
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 73b545278f5f..9a6dc2b38fcf 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -369,6 +369,7 @@  void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
 	to->nested_ctl          = from->nested_ctl;
 	to->event_inj           = from->event_inj;
 	to->event_inj_err       = from->event_inj_err;
+	to->next_rip            = from->next_rip;
 	to->nested_cr3          = from->nested_cr3;
 	to->virt_ext            = from->virt_ext;
 	to->pause_filter_count  = from->pause_filter_count;
@@ -606,7 +607,8 @@  static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 	}
 }
 
-static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
+static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
+					  unsigned long vmcb12_rip)
 {
 	u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
 	u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
@@ -660,6 +662,19 @@  static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 	vmcb02->control.event_inj           = svm->nested.ctl.event_inj;
 	vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
 
+	/*
+	 * next_rip is consumed on VMRUN as the return address pushed on the
+	 * stack for injected soft exceptions/interrupts.  If nrips is exposed
+	 * to L1, take it verbatim from vmcb12.  If nrips is supported in
+	 * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
+	 * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
+	 * prior to injecting the event).
+	 */
+	if (svm->nrips_enabled)
+		vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
+	else if (boot_cpu_has(X86_FEATURE_NRIPS))
+		vmcb02->control.next_rip    = vmcb12_rip;
+
 	vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
 					      LBR_CTL_ENABLE_MASK;
 	if (svm->lbrv_enabled)
@@ -743,7 +758,7 @@  int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
 	nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
 
 	svm_switch_vmcb(svm, &svm->nested.vmcb02);
-	nested_vmcb02_prepare_control(svm);
+	nested_vmcb02_prepare_control(svm, vmcb12->save.rip);
 	nested_vmcb02_prepare_save(svm, vmcb12);
 
 	ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
@@ -1422,6 +1437,7 @@  static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
 	dst->nested_ctl           = from->nested_ctl;
 	dst->event_inj            = from->event_inj;
 	dst->event_inj_err        = from->event_inj_err;
+	dst->next_rip             = from->next_rip;
 	dst->nested_cr3           = from->nested_cr3;
 	dst->virt_ext              = from->virt_ext;
 	dst->pause_filter_count   = from->pause_filter_count;
@@ -1606,7 +1622,7 @@  static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 	nested_copy_vmcb_control_to_cache(svm, ctl);
 
 	svm_switch_vmcb(svm, &svm->nested.vmcb02);
-	nested_vmcb02_prepare_control(svm);
+	nested_vmcb02_prepare_control(svm, save->rip);
 
 	/*
 	 * While the nested guest CR3 is already checked and set by
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index e246793cbeae..47e7427d0395 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -139,6 +139,7 @@  struct vmcb_ctrl_area_cached {
 	u64 nested_ctl;
 	u32 event_inj;
 	u32 event_inj_err;
+	u64 next_rip;
 	u64 nested_cr3;
 	u64 virt_ext;
 	u32 clean;