diff mbox series

[v2,1/2] KVM: nVMX: don't use vcpu->arch.efer when checking host state on nested state load

Message ID 20211115131837.195527-2-mlevitsk@redhat.com (mailing list archive)
State New, archived
Headers show
Series VMX: nested migration fixes for 32 bit nested guests | expand

Commit Message

Maxim Levitsky Nov. 15, 2021, 1:18 p.m. UTC
When loading the nested state, due to the way qemu loads
the nested state, vcpu->arch.efer contains L2' IA32_EFER which
can be completely different from L1's IA32_EFER, thus it is
wrong to do consistency check of it vs the vmcs12 exit fields.

To fix this

1. Split the host state consistency check
between current IA32_EFER.LMA and 'host address space' bit in VMCS12 into
nested_vmx_check_address_state_size.

2. Call this check only on a normal VM entry, while skipping this call
on loading the nested state.

3. Trust the 'host address space' bit to contain correct ia32e
value on loading the nested state as it is the best value of
it at that point.
Still do a consistency check of it vs host_ia32_efer in vmcs12.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

Comments

Sean Christopherson Nov. 15, 2021, 3:50 p.m. UTC | #1
On Mon, Nov 15, 2021, Maxim Levitsky wrote:
> When loading the nested state, due to the way qemu loads
> the nested state, vcpu->arch.efer contains L2' IA32_EFER which
> can be completely different from L1's IA32_EFER, thus it is
> wrong to do consistency check of it vs the vmcs12 exit fields.

This is not sufficient justification.  It makes it sound like KVM is hacking
around a bug in its ABI, which it is not, but that fact is _very_ subtle.  The
"trust" blurb in bullet (3) in particular is misleading.

Instead, I would like something like:

  When loading nested state, don't use check vcpu->arch.efer to get the
  L1 host's 64-bit vs. 32-bit state and don't check it for consistency
  with respect to VM_EXIT_HOST_ADDR_SPACE_SIZE, as register state in vCPU
  may be stale when KVM_SET_NESTED_STATE is called and conceptually does
  not exist.  When the CPU is in non-root mode, i.e. when restoring L2
  state in KVM, there is no snapshot of L1 host state, it is (conditionally)
  loaded on VM-Exit.  E.g. EFER is either preserved on exit, loaded from the
  VMCS (vmcs12 in this case), or loaded from the MSR load list.

  Use vmcs12.VM_EXIT_HOST_ADDR_SPACE_SIZE to determine the target mode of
  the L1 host, as it is the source of truth in this case.  Perform the EFER
  vs. vmcs12.VM_EXIT_HOST_ADDR_SPACE_SIZE consistency check only on VM-Enter,
  as conceptually there's no "current" L1 EFER to check.

  Note, KVM still checks vmcs12.HOST_EFER for consistency if
  if vmcs12.VM_EXIT_LOAD_IA32_EFER is set, i.e. this skips only the check
  against current vCPU state, which does not exist, when loading nested state.

> To fix this
> 
> 1. Split the host state consistency check
> between current IA32_EFER.LMA and 'host address space' bit in VMCS12 into
> nested_vmx_check_address_state_size.
> 
> 2. Call this check only on a normal VM entry, while skipping this call
> on loading the nested state.
> 
> 3. Trust the 'host address space' bit to contain correct ia32e
> value on loading the nested state as it is the best value of
> it at that point.
> Still do a consistency check of it vs host_ia32_efer in vmcs12.
> 
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  arch/x86/kvm/vmx/nested.c | 22 +++++++++++++++++-----
>  1 file changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index b4ee5e9f9e201..7b1d5510a7cdc 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2866,6 +2866,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>  
> +static int nested_vmx_check_address_state_size(struct kvm_vcpu *vcpu,
> +				       struct vmcs12 *vmcs12)

Bad indentation.

> +{
> +#ifdef CONFIG_X86_64
> +	if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
> +		!!(vcpu->arch.efer & EFER_LMA)))

Bad indentation.  The number of !'s is also unnecessary.  This also needs a comment
explaining why it's not included in the KVM_SET_NESTED_STATE path.

> +		return -EINVAL;
> +#endif
> +	return 0;
> +}
> +
>  static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
>  				       struct vmcs12 *vmcs12)
>  {
> @@ -2890,18 +2901,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
>  		return -EINVAL;
>  
>  #ifdef CONFIG_X86_64
> -	ia32e = !!(vcpu->arch.efer & EFER_LMA);
> +	ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);


>  #else
>  	ia32e = false;
>  #endif
>  
>  	if (ia32e) {
> -		if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
> -		    CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
> +		if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
>  			return -EINVAL;
>  	} else {
> -		if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
> -		    CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
> +		if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
>  		    CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
>  		    CC((vmcs12->host_rip) >> 32))
>  			return -EINVAL;
> @@ -3571,6 +3580,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>  	if (nested_vmx_check_controls(vcpu, vmcs12))
>  		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
>  
> +	if (nested_vmx_check_address_state_size(vcpu, vmcs12))
> +		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
> +
>  	if (nested_vmx_check_host_state(vcpu, vmcs12))
>  		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
>  
> -- 
> 2.26.3
>
Paolo Bonzini Nov. 16, 2021, 10:03 a.m. UTC | #2
On 11/15/21 16:50, Sean Christopherson wrote:
>    When loading nested state, don't use check vcpu->arch.efer to get the
>    L1 host's 64-bit vs. 32-bit state and don't check it for consistency
>    with respect to VM_EXIT_HOST_ADDR_SPACE_SIZE, as register state in vCPU
>    may be stale when KVM_SET_NESTED_STATE is called and conceptually does
>    not exist.  When the CPU is in non-root mode, i.e. when restoring L2
>    state in KVM, there is no snapshot of L1 host state, it is (conditionally)
>    loaded on VM-Exit.  E.g. EFER is either preserved on exit, loaded from the
>    VMCS (vmcs12 in this case), or loaded from the MSR load list.
> 
>    Use vmcs12.VM_EXIT_HOST_ADDR_SPACE_SIZE to determine the target mode of
>    the L1 host, as it is the source of truth in this case.  Perform the EFER
>    vs. vmcs12.VM_EXIT_HOST_ADDR_SPACE_SIZE consistency check only on VM-Enter,
>    as conceptually there's no "current" L1 EFER to check.
> 
>    Note, KVM still checks vmcs12.HOST_EFER for consistency if
>    if vmcs12.VM_EXIT_LOAD_IA32_EFER is set, i.e. this skips only the check
>    against current vCPU state, which does not exist, when loading nested state.

Queued with some further edits and nested_vmx_check_address_state_size 
renamed to nested_vmx_check_address_*space*_size.

I think the "!!" are best left in place though, because "!!(a & b)" is 
idiomatic. Comparing "!(a & b)" would leave the reader wondering about 
the inversion, and "(bool)(a & b)" is just too ugly and magic.  The 
compiler anyway converts the "!!" to "!= 0" very early on, and never 
performs back-to-back logical NOTs.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b4ee5e9f9e201..7b1d5510a7cdc 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2866,6 +2866,17 @@  static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int nested_vmx_check_address_state_size(struct kvm_vcpu *vcpu,
+				       struct vmcs12 *vmcs12)
+{
+#ifdef CONFIG_X86_64
+	if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
+		!!(vcpu->arch.efer & EFER_LMA)))
+		return -EINVAL;
+#endif
+	return 0;
+}
+
 static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 				       struct vmcs12 *vmcs12)
 {
@@ -2890,18 +2901,16 @@  static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 #ifdef CONFIG_X86_64
-	ia32e = !!(vcpu->arch.efer & EFER_LMA);
+	ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
 #else
 	ia32e = false;
 #endif
 
 	if (ia32e) {
-		if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
-		    CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+		if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
 			return -EINVAL;
 	} else {
-		if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
-		    CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+		if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
 		    CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
 		    CC((vmcs12->host_rip) >> 32))
 			return -EINVAL;
@@ -3571,6 +3580,9 @@  static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	if (nested_vmx_check_controls(vcpu, vmcs12))
 		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 
+	if (nested_vmx_check_address_state_size(vcpu, vmcs12))
+		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
+
 	if (nested_vmx_check_host_state(vcpu, vmcs12))
 		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);