diff mbox series

KVM: nSVM: Pull CS.Base from actual VMCB12 for soft int/ex re-injection

Message ID 4caa0f67589ae3c22c311ee0e6139496902f2edc.1658159083.git.maciej.szmigiero@oracle.com (mailing list archive)
State New, archived
Headers show
Series KVM: nSVM: Pull CS.Base from actual VMCB12 for soft int/ex re-injection | expand

Commit Message

Maciej S. Szmigiero July 18, 2022, 3:47 p.m. UTC
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>

enter_svm_guest_mode() first calls nested_vmcb02_prepare_control() to copy
control fields from VMCB12 to the current VMCB, then
nested_vmcb02_prepare_save() to perform a similar copy of the save area.

This means that nested_vmcb02_prepare_control() still runs with the
previous save area values in the current VMCB so it shouldn't take the L2
guest CS.Base from this area.

Explicitly pull CS.Base from the actual VMCB12 instead in
enter_svm_guest_mode().

Granted, having a non-zero CS.Base is a very rare thing (and even
impossible in 64-bit mode), having it change between nested VMRUNs is
probably even rarer, but if it happens it would create a really subtle bug
so it's better to fix it upfront.

Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction")
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
 arch/x86/kvm/svm/nested.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini July 19, 2022, 1:07 p.m. UTC | #1
On 7/18/22 17:47, Maciej S. Szmigiero wrote:
> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
> 
> enter_svm_guest_mode() first calls nested_vmcb02_prepare_control() to copy
> control fields from VMCB12 to the current VMCB, then
> nested_vmcb02_prepare_save() to perform a similar copy of the save area.
> 
> This means that nested_vmcb02_prepare_control() still runs with the
> previous save area values in the current VMCB so it shouldn't take the L2
> guest CS.Base from this area.
> 
> Explicitly pull CS.Base from the actual VMCB12 instead in
> enter_svm_guest_mode().
> 
> Granted, having a non-zero CS.Base is a very rare thing (and even
> impossible in 64-bit mode), having it change between nested VMRUNs is
> probably even rarer, but if it happens it would create a really subtle bug
> so it's better to fix it upfront.
> 
> Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction")
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> ---
>   arch/x86/kvm/svm/nested.c | 9 +++++----
>   1 file changed, 5 insertions(+), 4 deletions(-)

Queued, thanks.

Paolo

> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index adf4120b05d90..23252ab821941 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj)
>   }
>   
>   static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
> -					  unsigned long vmcb12_rip)
> +					  unsigned long vmcb12_rip,
> +					  unsigned long vmcb12_csbase)
>   {
>   	u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
>   	u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
> @@ -711,7 +712,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
>   	svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
>   	if (is_evtinj_soft(vmcb02->control.event_inj)) {
>   		svm->soft_int_injected = true;
> -		svm->soft_int_csbase = svm->vmcb->save.cs.base;
> +		svm->soft_int_csbase = vmcb12_csbase;
>   		svm->soft_int_old_rip = vmcb12_rip;
>   		if (svm->nrips_enabled)
>   			svm->soft_int_next_rip = svm->nested.ctl.next_rip;
> @@ -800,7 +801,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
>   	nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
>   
>   	svm_switch_vmcb(svm, &svm->nested.vmcb02);
> -	nested_vmcb02_prepare_control(svm, vmcb12->save.rip);
> +	nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
>   	nested_vmcb02_prepare_save(svm, vmcb12);
>   
>   	ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
> @@ -1663,7 +1664,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>   	nested_copy_vmcb_control_to_cache(svm, ctl);
>   
>   	svm_switch_vmcb(svm, &svm->nested.vmcb02);
> -	nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip);
> +	nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
>   
>   	/*
>   	 * While the nested guest CR3 is already checked and set by
>
Maxim Levitsky July 20, 2022, 8:43 a.m. UTC | #2
On Mon, 2022-07-18 at 17:47 +0200, Maciej S. Szmigiero wrote:
> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
> 
> enter_svm_guest_mode() first calls nested_vmcb02_prepare_control() to copy
> control fields from VMCB12 to the current VMCB, then
> nested_vmcb02_prepare_save() to perform a similar copy of the save area.
> 
> This means that nested_vmcb02_prepare_control() still runs with the
> previous save area values in the current VMCB so it shouldn't take the L2
> guest CS.Base from this area.
> 
> Explicitly pull CS.Base from the actual VMCB12 instead in
> enter_svm_guest_mode().
> 
> Granted, having a non-zero CS.Base is a very rare thing (and even
> impossible in 64-bit mode), having it change between nested VMRUNs is
> probably even rarer, but if it happens it would create a really subtle bug
> so it's better to fix it upfront.
> 
> Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction")
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> ---
>  arch/x86/kvm/svm/nested.c | 9 +++++----
>  1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index adf4120b05d90..23252ab821941 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj)
>  }
>  
>  static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
> -                                         unsigned long vmcb12_rip)
> +                                         unsigned long vmcb12_rip,
> +                                         unsigned long vmcb12_csbase)

Honestly I don't like that nested_vmcb02_prepare_control starts to grow its parameter list,
because it kind of defeats the purpose of vmcb12 cache we added back then.

I think that it is better to add csbase/rip to vmcb_save_area_cached,
but I am not 100% sure. What do you think?

Best regards,
	Maxim Levitsky


>  {
>         u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
>         u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
> @@ -711,7 +712,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
>         svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
>         if (is_evtinj_soft(vmcb02->control.event_inj)) {
>                 svm->soft_int_injected = true;
> -               svm->soft_int_csbase = svm->vmcb->save.cs.base;
> +               svm->soft_int_csbase = vmcb12_csbase;
>                 svm->soft_int_old_rip = vmcb12_rip;
>                 if (svm->nrips_enabled)
>                         svm->soft_int_next_rip = svm->nested.ctl.next_rip;
> @@ -800,7 +801,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
>         nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
>  
>         svm_switch_vmcb(svm, &svm->nested.vmcb02);
> -       nested_vmcb02_prepare_control(svm, vmcb12->save.rip);
> +       nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
>         nested_vmcb02_prepare_save(svm, vmcb12);
>  
>         ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
> @@ -1663,7 +1664,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>         nested_copy_vmcb_control_to_cache(svm, ctl);
>  
>         svm_switch_vmcb(svm, &svm->nested.vmcb02);
> -       nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip);
> +       nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
>  
>         /*
>          * While the nested guest CR3 is already checked and set by
>
Maciej S. Szmigiero July 20, 2022, 4:07 p.m. UTC | #3
On 20.07.2022 10:43, Maxim Levitsky wrote:
> On Mon, 2022-07-18 at 17:47 +0200, Maciej S. Szmigiero wrote:
>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>
>> enter_svm_guest_mode() first calls nested_vmcb02_prepare_control() to copy
>> control fields from VMCB12 to the current VMCB, then
>> nested_vmcb02_prepare_save() to perform a similar copy of the save area.
>>
>> This means that nested_vmcb02_prepare_control() still runs with the
>> previous save area values in the current VMCB so it shouldn't take the L2
>> guest CS.Base from this area.
>>
>> Explicitly pull CS.Base from the actual VMCB12 instead in
>> enter_svm_guest_mode().
>>
>> Granted, having a non-zero CS.Base is a very rare thing (and even
>> impossible in 64-bit mode), having it change between nested VMRUNs is
>> probably even rarer, but if it happens it would create a really subtle bug
>> so it's better to fix it upfront.
>>
>> Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction")
>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>> ---
>>   arch/x86/kvm/svm/nested.c | 9 +++++----
>>   1 file changed, 5 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
>> index adf4120b05d90..23252ab821941 100644
>> --- a/arch/x86/kvm/svm/nested.c
>> +++ b/arch/x86/kvm/svm/nested.c
>> @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj)
>>   }
>>   
>>   static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
>> -                                         unsigned long vmcb12_rip)
>> +                                         unsigned long vmcb12_rip,
>> +                                         unsigned long vmcb12_csbase)
> 
> Honestly I don't like that nested_vmcb02_prepare_control starts to grow its parameter list,
> because it kind of defeats the purpose of vmcb12 cache we added back then.
> 
> I think that it is better to add csbase/rip to vmcb_save_area_cached,
> but I am not 100% sure. What do you think?

This function has only 3 parameters now, so they fit well into registers
without taking any extra memory (even assuming it won't get inlined).

If in the future more parameters need to be added to this function
(which may or may not happen) then they all can be moved to, for example,
vmcb_ctrl_area_cached.

> Best regards,
> 	Maxim Levitsky
> 
> 

Thanks,
Maciej
Sean Christopherson July 20, 2022, 9:34 p.m. UTC | #4
On Wed, Jul 20, 2022, Maciej S. Szmigiero wrote:
> On 20.07.2022 10:43, Maxim Levitsky wrote:
> > On Mon, 2022-07-18 at 17:47 +0200, Maciej S. Szmigiero wrote:
> > > Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction")
> > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> > > ---
> > >   arch/x86/kvm/svm/nested.c | 9 +++++----
> > >   1 file changed, 5 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > > index adf4120b05d90..23252ab821941 100644
> > > --- a/arch/x86/kvm/svm/nested.c
> > > +++ b/arch/x86/kvm/svm/nested.c
> > > @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj)
> > >   }
> > >   static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
> > > -                                         unsigned long vmcb12_rip)
> > > +                                         unsigned long vmcb12_rip,
> > > +                                         unsigned long vmcb12_csbase)
> > 
> > Honestly I don't like that nested_vmcb02_prepare_control starts to grow its parameter list,
> > because it kind of defeats the purpose of vmcb12 cache we added back then.
> > 
> > I think that it is better to add csbase/rip to vmcb_save_area_cached,
> > but I am not 100% sure. What do you think?
> 
> This function has only 3 parameters now, so they fit well into registers
> without taking any extra memory (even assuming it won't get inlined).
> 
> If in the future more parameters need to be added to this function
> (which may or may not happen) then they all can be moved to, for example,
> vmcb_ctrl_area_cached.

I don't think Maxim is concerned about the size, rather that we have a dedicated
struct for snapshotting select save state and aren't using it.

IIRC, I deliberately avoided using the "cache" because the main/original purpose
of the cache is to avoid TOCTOU issues.  And because RIP and CS.base aren't checked,
there's no need to throw them in the cache.
Maxim Levitsky July 21, 2022, 11:51 a.m. UTC | #5
On Wed, 2022-07-20 at 21:34 +0000, Sean Christopherson wrote:
> On Wed, Jul 20, 2022, Maciej S. Szmigiero wrote:
> > On 20.07.2022 10:43, Maxim Levitsky wrote:
> > > On Mon, 2022-07-18 at 17:47 +0200, Maciej S. Szmigiero wrote:
> > > > Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction")
> > > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> > > > ---
> > > >   arch/x86/kvm/svm/nested.c | 9 +++++----
> > > >   1 file changed, 5 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > > > index adf4120b05d90..23252ab821941 100644
> > > > --- a/arch/x86/kvm/svm/nested.c
> > > > +++ b/arch/x86/kvm/svm/nested.c
> > > > @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj)
> > > >   }
> > > >   static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
> > > > -                                         unsigned long vmcb12_rip)
> > > > +                                         unsigned long vmcb12_rip,
> > > > +                                         unsigned long vmcb12_csbase)
> > > 
> > > Honestly I don't like that nested_vmcb02_prepare_control starts to grow its parameter list,
> > > because it kind of defeats the purpose of vmcb12 cache we added back then.
> > > 
> > > I think that it is better to add csbase/rip to vmcb_save_area_cached,
> > > but I am not 100% sure. What do you think?
> > 
> > This function has only 3 parameters now, so they fit well into registers
> > without taking any extra memory (even assuming it won't get inlined).
> > 
> > If in the future more parameters need to be added to this function
> > (which may or may not happen) then they all can be moved to, for example,
> > vmcb_ctrl_area_cached.
> 
> I don't think Maxim is concerned about the size, rather that we have a dedicated
> struct for snapshotting select save state and aren't using it.

Yes my thoughts exactly. The thing is that passing these values as parameters to this function
also works like a cache, but not going through the cache that we already have
for this purpose.

Anyway for now let it be, but we might need to rethink it in the future.

Best regards,
	Maxim Levitsky

> 
> IIRC, I deliberately avoided using the "cache" because the main/original purpose
> of the cache is to avoid TOCTOU issues.  And because RIP and CS.base aren't checked,
> there's no need to throw them in the cache.
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index adf4120b05d90..23252ab821941 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -639,7 +639,8 @@  static bool is_evtinj_nmi(u32 evtinj)
 }
 
 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
-					  unsigned long vmcb12_rip)
+					  unsigned long vmcb12_rip,
+					  unsigned long vmcb12_csbase)
 {
 	u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
 	u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
@@ -711,7 +712,7 @@  static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 	svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
 	if (is_evtinj_soft(vmcb02->control.event_inj)) {
 		svm->soft_int_injected = true;
-		svm->soft_int_csbase = svm->vmcb->save.cs.base;
+		svm->soft_int_csbase = vmcb12_csbase;
 		svm->soft_int_old_rip = vmcb12_rip;
 		if (svm->nrips_enabled)
 			svm->soft_int_next_rip = svm->nested.ctl.next_rip;
@@ -800,7 +801,7 @@  int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
 	nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
 
 	svm_switch_vmcb(svm, &svm->nested.vmcb02);
-	nested_vmcb02_prepare_control(svm, vmcb12->save.rip);
+	nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
 	nested_vmcb02_prepare_save(svm, vmcb12);
 
 	ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
@@ -1663,7 +1664,7 @@  static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 	nested_copy_vmcb_control_to_cache(svm, ctl);
 
 	svm_switch_vmcb(svm, &svm->nested.vmcb02);
-	nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip);
+	nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
 
 	/*
 	 * While the nested guest CR3 is already checked and set by