diff mbox

[v3,1/5] KVM: nVMX: generate non-true VMX MSRs based on true versions

Message ID 1480472050-58023-2-git-send-email-dmatlack@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

David Matlack Nov. 30, 2016, 2:14 a.m. UTC
The "non-true" VMX capability MSRs can be generated from their "true"
counterparts, by OR-ing the default1 bits. The default1 bits are fixed
and defined in the SDM.

Since we can generate the non-true VMX MSRs from the true versions,
there's no need to store both in struct nested_vmx. This also lets
userspace avoid having to restore the non-true MSRs.

Note this does not preclude emulating MSR_IA32_VMX_BASIC[55]=0. To do so,
we simply need to set all the default1 bits in the true MSRs (such that
the true MSRs and the generated non-true MSRs are equal).

Signed-off-by: David Matlack <dmatlack@google.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 45 +++++++++++++++++++--------------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

Comments

Paolo Bonzini Nov. 30, 2016, 11:16 a.m. UTC | #1
On 30/11/2016 03:14, David Matlack wrote:
> The "non-true" VMX capability MSRs can be generated from their "true"
> counterparts, by OR-ing the default1 bits. The default1 bits are fixed
> and defined in the SDM.
> 
> Since we can generate the non-true VMX MSRs from the true versions,
> there's no need to store both in struct nested_vmx. This also lets
> userspace avoid having to restore the non-true MSRs.
> 
> Note this does not preclude emulating MSR_IA32_VMX_BASIC[55]=0. To do so,
> we simply need to set all the default1 bits in the true MSRs (such that
> the true MSRs and the generated non-true MSRs are equal).
> 
> Signed-off-by: David Matlack <dmatlack@google.com>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx.c | 45 +++++++++++++++++++--------------------------
>  1 file changed, 19 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5382b82..0beb56a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -446,19 +446,21 @@ struct nested_vmx {
>  	u16 vpid02;
>  	u16 last_vpid;
>  
> +	/*
> +	 * We only store the "true" versions of the VMX capability MSRs. We
> +	 * generate the "non-true" versions by setting the must-be-1 bits
> +	 * according to the SDM.
> +	 */
>  	u32 nested_vmx_procbased_ctls_low;
>  	u32 nested_vmx_procbased_ctls_high;
> -	u32 nested_vmx_true_procbased_ctls_low;
>  	u32 nested_vmx_secondary_ctls_low;
>  	u32 nested_vmx_secondary_ctls_high;
>  	u32 nested_vmx_pinbased_ctls_low;
>  	u32 nested_vmx_pinbased_ctls_high;
>  	u32 nested_vmx_exit_ctls_low;
>  	u32 nested_vmx_exit_ctls_high;
> -	u32 nested_vmx_true_exit_ctls_low;
>  	u32 nested_vmx_entry_ctls_low;
>  	u32 nested_vmx_entry_ctls_high;
> -	u32 nested_vmx_true_entry_ctls_low;
>  	u32 nested_vmx_misc_low;
>  	u32 nested_vmx_misc_high;
>  	u32 nested_vmx_ept_caps;
> @@ -2712,9 +2714,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
>  		vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
>  
>  	/* We support free control of debug control saving. */
> -	vmx->nested.nested_vmx_true_exit_ctls_low =
> -		vmx->nested.nested_vmx_exit_ctls_low &
> -		~VM_EXIT_SAVE_DEBUG_CONTROLS;
> +	vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
>  
>  	/* entry controls */
>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
> @@ -2733,9 +2733,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
>  		vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
>  
>  	/* We support free control of debug control loading. */
> -	vmx->nested.nested_vmx_true_entry_ctls_low =
> -		vmx->nested.nested_vmx_entry_ctls_low &
> -		~VM_ENTRY_LOAD_DEBUG_CONTROLS;
> +	vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
>  
>  	/* cpu-based controls */
>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
> @@ -2768,8 +2766,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
>  		CPU_BASED_USE_MSR_BITMAPS;
>  
>  	/* We support free control of CR3 access interception. */
> -	vmx->nested.nested_vmx_true_procbased_ctls_low =
> -		vmx->nested.nested_vmx_procbased_ctls_low &
> +	vmx->nested.nested_vmx_procbased_ctls_low &=
>  		~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
>  
>  	/* secondary cpu-based controls */
> @@ -2868,36 +2865,32 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
>  		*pdata = vmx_control_msr(
>  			vmx->nested.nested_vmx_pinbased_ctls_low,
>  			vmx->nested.nested_vmx_pinbased_ctls_high);
> +		if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
> +			*pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;

Almost: PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR must be
added to both the low and high parts.  Likewise below.
I guess you can use vmx_control_msr to generate it, too.

Paolo

>  		break;
>  	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
> -		*pdata = vmx_control_msr(
> -			vmx->nested.nested_vmx_true_procbased_ctls_low,
> -			vmx->nested.nested_vmx_procbased_ctls_high);
> -		break;
>  	case MSR_IA32_VMX_PROCBASED_CTLS:
>  		*pdata = vmx_control_msr(
>  			vmx->nested.nested_vmx_procbased_ctls_low,
>  			vmx->nested.nested_vmx_procbased_ctls_high);
> +		if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
> +			*pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
>  		break;
>  	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
> -		*pdata = vmx_control_msr(
> -			vmx->nested.nested_vmx_true_exit_ctls_low,
> -			vmx->nested.nested_vmx_exit_ctls_high);
> -		break;
>  	case MSR_IA32_VMX_EXIT_CTLS:
>  		*pdata = vmx_control_msr(
>  			vmx->nested.nested_vmx_exit_ctls_low,
>  			vmx->nested.nested_vmx_exit_ctls_high);
> +		if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
> +			*pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
>  		break;
>  	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
> -		*pdata = vmx_control_msr(
> -			vmx->nested.nested_vmx_true_entry_ctls_low,
> -			vmx->nested.nested_vmx_entry_ctls_high);
> -		break;
>  	case MSR_IA32_VMX_ENTRY_CTLS:
>  		*pdata = vmx_control_msr(
>  			vmx->nested.nested_vmx_entry_ctls_low,
>  			vmx->nested.nested_vmx_entry_ctls_high);
> +		if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
> +			*pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>  		break;
>  	case MSR_IA32_VMX_MISC:
>  		*pdata = vmx_control_msr(
> @@ -10184,7 +10177,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>  	}
>  
>  	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
> -				vmx->nested.nested_vmx_true_procbased_ctls_low,
> +				vmx->nested.nested_vmx_procbased_ctls_low,
>  				vmx->nested.nested_vmx_procbased_ctls_high) ||
>  	    !vmx_control_verify(vmcs12->secondary_vm_exec_control,
>  				vmx->nested.nested_vmx_secondary_ctls_low,
> @@ -10193,10 +10186,10 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>  				vmx->nested.nested_vmx_pinbased_ctls_low,
>  				vmx->nested.nested_vmx_pinbased_ctls_high) ||
>  	    !vmx_control_verify(vmcs12->vm_exit_controls,
> -				vmx->nested.nested_vmx_true_exit_ctls_low,
> +				vmx->nested.nested_vmx_exit_ctls_low,
>  				vmx->nested.nested_vmx_exit_ctls_high) ||
>  	    !vmx_control_verify(vmcs12->vm_entry_controls,
> -				vmx->nested.nested_vmx_true_entry_ctls_low,
> +				vmx->nested.nested_vmx_entry_ctls_low,
>  				vmx->nested.nested_vmx_entry_ctls_high))
>  	{
>  		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack Nov. 30, 2016, 6:05 p.m. UTC | #2
On Wed, Nov 30, 2016 at 3:16 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 30/11/2016 03:14, David Matlack wrote:
>>
>>       /* secondary cpu-based controls */
>> @@ -2868,36 +2865,32 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
>>               *pdata = vmx_control_msr(
>>                       vmx->nested.nested_vmx_pinbased_ctls_low,
>>                       vmx->nested.nested_vmx_pinbased_ctls_high);
>> +             if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
>> +                     *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
>
> Almost: PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR must be
> added to both the low and high parts.  Likewise below.
> I guess you can use vmx_control_msr to generate it, too.

SGTM.

Although that would mean the true MSRs indicate a bit must-be-0 while
the non-true MSRs are indicating it must-be-1, which seems odd.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Nov. 30, 2016, 8:45 p.m. UTC | #3
----- Original Message -----
> From: "David Matlack" <dmatlack@google.com>
> To: "Paolo Bonzini" <pbonzini@redhat.com>
> Cc: "kvm list" <kvm@vger.kernel.org>, linux-kernel@vger.kernel.org, "Jim Mattson" <jmattson@google.com>, "Radim
> Krčmář" <rkrcmar@redhat.com>
> Sent: Wednesday, November 30, 2016 7:05:04 PM
> Subject: Re: [PATCH v3 1/5] KVM: nVMX: generate non-true VMX MSRs based on true versions
> 
> On Wed, Nov 30, 2016 at 3:16 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> > On 30/11/2016 03:14, David Matlack wrote:
> >>
> >>       /* secondary cpu-based controls */
> >> @@ -2868,36 +2865,32 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu,
> >> u32 msr_index, u64 *pdata)
> >>               *pdata = vmx_control_msr(
> >>                       vmx->nested.nested_vmx_pinbased_ctls_low,
> >>                       vmx->nested.nested_vmx_pinbased_ctls_high);
> >> +             if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
> >> +                     *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
> >
> > Almost: PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR must be
> > added to both the low and high parts.  Likewise below.
> > I guess you can use vmx_control_msr to generate it, too.
> 
> SGTM.
> 
> Although that would mean the true MSRs indicate a bit must-be-0 while
> the non-true MSRs are indicating it must-be-1, which seems odd.

You're right. the high part is "can be 1", so the true MSR's high part
must already include the always-on-without-true-MSR bits.  Good!

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5382b82..0beb56a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -446,19 +446,21 @@  struct nested_vmx {
 	u16 vpid02;
 	u16 last_vpid;
 
+	/*
+	 * We only store the "true" versions of the VMX capability MSRs. We
+	 * generate the "non-true" versions by setting the must-be-1 bits
+	 * according to the SDM.
+	 */
 	u32 nested_vmx_procbased_ctls_low;
 	u32 nested_vmx_procbased_ctls_high;
-	u32 nested_vmx_true_procbased_ctls_low;
 	u32 nested_vmx_secondary_ctls_low;
 	u32 nested_vmx_secondary_ctls_high;
 	u32 nested_vmx_pinbased_ctls_low;
 	u32 nested_vmx_pinbased_ctls_high;
 	u32 nested_vmx_exit_ctls_low;
 	u32 nested_vmx_exit_ctls_high;
-	u32 nested_vmx_true_exit_ctls_low;
 	u32 nested_vmx_entry_ctls_low;
 	u32 nested_vmx_entry_ctls_high;
-	u32 nested_vmx_true_entry_ctls_low;
 	u32 nested_vmx_misc_low;
 	u32 nested_vmx_misc_high;
 	u32 nested_vmx_ept_caps;
@@ -2712,9 +2714,7 @@  static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
 
 	/* We support free control of debug control saving. */
-	vmx->nested.nested_vmx_true_exit_ctls_low =
-		vmx->nested.nested_vmx_exit_ctls_low &
-		~VM_EXIT_SAVE_DEBUG_CONTROLS;
+	vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
 
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2733,9 +2733,7 @@  static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
 	/* We support free control of debug control loading. */
-	vmx->nested.nested_vmx_true_entry_ctls_low =
-		vmx->nested.nested_vmx_entry_ctls_low &
-		~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+	vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
 
 	/* cpu-based controls */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2768,8 +2766,7 @@  static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		CPU_BASED_USE_MSR_BITMAPS;
 
 	/* We support free control of CR3 access interception. */
-	vmx->nested.nested_vmx_true_procbased_ctls_low =
-		vmx->nested.nested_vmx_procbased_ctls_low &
+	vmx->nested.nested_vmx_procbased_ctls_low &=
 		~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
 
 	/* secondary cpu-based controls */
@@ -2868,36 +2865,32 @@  static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		*pdata = vmx_control_msr(
 			vmx->nested.nested_vmx_pinbased_ctls_low,
 			vmx->nested.nested_vmx_pinbased_ctls_high);
+		if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
+			*pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
 		break;
 	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
-		*pdata = vmx_control_msr(
-			vmx->nested.nested_vmx_true_procbased_ctls_low,
-			vmx->nested.nested_vmx_procbased_ctls_high);
-		break;
 	case MSR_IA32_VMX_PROCBASED_CTLS:
 		*pdata = vmx_control_msr(
 			vmx->nested.nested_vmx_procbased_ctls_low,
 			vmx->nested.nested_vmx_procbased_ctls_high);
+		if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
+			*pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
 		break;
 	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
-		*pdata = vmx_control_msr(
-			vmx->nested.nested_vmx_true_exit_ctls_low,
-			vmx->nested.nested_vmx_exit_ctls_high);
-		break;
 	case MSR_IA32_VMX_EXIT_CTLS:
 		*pdata = vmx_control_msr(
 			vmx->nested.nested_vmx_exit_ctls_low,
 			vmx->nested.nested_vmx_exit_ctls_high);
+		if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
+			*pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
 		break;
 	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
-		*pdata = vmx_control_msr(
-			vmx->nested.nested_vmx_true_entry_ctls_low,
-			vmx->nested.nested_vmx_entry_ctls_high);
-		break;
 	case MSR_IA32_VMX_ENTRY_CTLS:
 		*pdata = vmx_control_msr(
 			vmx->nested.nested_vmx_entry_ctls_low,
 			vmx->nested.nested_vmx_entry_ctls_high);
+		if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
+			*pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
 		break;
 	case MSR_IA32_VMX_MISC:
 		*pdata = vmx_control_msr(
@@ -10184,7 +10177,7 @@  static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	}
 
 	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
-				vmx->nested.nested_vmx_true_procbased_ctls_low,
+				vmx->nested.nested_vmx_procbased_ctls_low,
 				vmx->nested.nested_vmx_procbased_ctls_high) ||
 	    !vmx_control_verify(vmcs12->secondary_vm_exec_control,
 				vmx->nested.nested_vmx_secondary_ctls_low,
@@ -10193,10 +10186,10 @@  static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 				vmx->nested.nested_vmx_pinbased_ctls_low,
 				vmx->nested.nested_vmx_pinbased_ctls_high) ||
 	    !vmx_control_verify(vmcs12->vm_exit_controls,
-				vmx->nested.nested_vmx_true_exit_ctls_low,
+				vmx->nested.nested_vmx_exit_ctls_low,
 				vmx->nested.nested_vmx_exit_ctls_high) ||
 	    !vmx_control_verify(vmcs12->vm_entry_controls,
-				vmx->nested.nested_vmx_true_entry_ctls_low,
+				vmx->nested.nested_vmx_entry_ctls_low,
 				vmx->nested.nested_vmx_entry_ctls_high))
 	{
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);