[1/2] kvm: vmx: Add IA32_FLUSH_CMD guest support
diff mbox series

Message ID 20180814173049.21756-1-jmattson@google.com
State New
Headers show
Series
  • [1/2] kvm: vmx: Add IA32_FLUSH_CMD guest support
Related show

Commit Message

Jim Mattson Aug. 14, 2018, 5:30 p.m. UTC
Expose IA32_FLUSH_CMD to the guest if the guest CPUID enumerates
support for this MSR. As with IA32_PRED_CMD, permission for
unintercepted writes to this MSR will be granted to the guest after
the first non-zero write.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Ben Serebrin <serebrin@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
---
 arch/x86/kvm/vmx.c | 44 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 9 deletions(-)

Comments

Konrad Rzeszutek Wilk Aug. 14, 2018, 6:52 p.m. UTC | #1
On Tue, Aug 14, 2018 at 10:30:48AM -0700, Jim Mattson wrote:
> Expose IA32_FLUSH_CMD to the guest if the guest CPUID enumerates
> support for this MSR. As with IA32_PRED_CMD, permission for
> unintercepted writes to this MSR will be granted to the guest after
> the first non-zero write.

I can't seem to find the Intel docs (even thought all the pages
are all set), but my understanding is that the ARCH_CAPABILITIES Bit(3)
would suffice. That is it says that for nested OSes you shouldn't
use the IA32_FLUSH_CMD?


> 
> Signed-off-by: Jim Mattson <jmattson@google.com>
> Reviewed-by: Ben Serebrin <serebrin@google.com>
> Reviewed-by: Peter Shier <pshier@google.com>
> ---
>  arch/x86/kvm/vmx.c | 44 +++++++++++++++++++++++++++++++++++---------
>  1 file changed, 35 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 46b428c0990e0..1b40265376641 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3996,6 +3996,28 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  
>  static void vmx_leave_nested(struct kvm_vcpu *vcpu);
>  
> +static bool valid_msr_setting(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> +{
> +	bool valid;
> +
> +	switch (msr_info->index) {
> +	case MSR_IA32_PRED_CMD:
> +		valid = (msr_info->host_initiated ||
> +			 guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) &&
> +			!(msr_info->data & ~PRED_CMD_IBPB);
> +		break;
> +	case MSR_IA32_FLUSH_CMD:
> +		valid = (msr_info->host_initiated ||
> +			 guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)) &&
> +			!(msr_info->data & ~L1D_FLUSH);
> +		break;
> +	default:
> +		valid = false;
> +		break;
> +	}
> +	return valid;
> +}
> +
>  /*
>   * Writes msr value into into the appropriate "register".
>   * Returns 0 on success, non-0 otherwise.
> @@ -4077,17 +4099,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  					      MSR_TYPE_RW);
>  		break;
>  	case MSR_IA32_PRED_CMD:
> -		if (!msr_info->host_initiated &&
> -		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
> -			return 1;
> -
> -		if (data & ~PRED_CMD_IBPB)
> +	case MSR_IA32_FLUSH_CMD:
> +		if (!valid_msr_setting(vcpu, msr_info))
>  			return 1;
>  
>  		if (!data)
>  			break;
>  
> -		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
> +		wrmsrl(msr_index, data);
>  
>  		/*
>  		 * For non-nested:
> @@ -4100,7 +4119,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		 * vmcs02.msr_bitmap here since it gets completely overwritten
>  		 * in the merging.
>  		 */
> -		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
> +		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, msr_index,
>  					      MSR_TYPE_W);
>  		break;
>  	case MSR_IA32_ARCH_CAPABILITIES:
> @@ -11089,7 +11108,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
>  	unsigned long *msr_bitmap_l1;
>  	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
>  	/*
> -	 * pred_cmd & spec_ctrl are trying to verify two things:
> +	 * pred_cmd, flush_cmd & spec_ctrl are trying to verify two things:
>  	 *
>  	 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
>  	 *    ensures that we do not accidentally generate an L02 MSR bitmap
> @@ -11102,6 +11121,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
>  	 *    the MSR.
>  	 */
>  	bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
> +	bool flush_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_FLUSH_CMD);
>  	bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
>  
>  	/* Nothing to do if the MSR bitmap is not in use.  */
> @@ -11110,7 +11130,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
>  		return false;
>  
>  	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
> -	    !pred_cmd && !spec_ctrl)
> +	    !pred_cmd && !flush_cmd && !spec_ctrl)
>  		return false;
>  
>  	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
> @@ -11165,6 +11185,12 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
>  					MSR_IA32_PRED_CMD,
>  					MSR_TYPE_W);
>  
> +	if (flush_cmd)
> +		nested_vmx_disable_intercept_for_msr(
> +					msr_bitmap_l1, msr_bitmap_l0,
> +					MSR_IA32_FLUSH_CMD,
> +					MSR_TYPE_W);
> +
>  	kunmap(page);
>  	kvm_release_page_clean(page);
>  
> -- 
> 2.18.0.865.gffc8e1a3cd6-goog
>
Jim Mattson Aug. 14, 2018, 7:30 p.m. UTC | #2
On Tue, Aug 14, 2018 at 11:52 AM, Konrad Rzeszutek Wilk
<konrad.wilk@oracle.com> wrote:
> On Tue, Aug 14, 2018 at 10:30:48AM -0700, Jim Mattson wrote:
>> Expose IA32_FLUSH_CMD to the guest if the guest CPUID enumerates
>> support for this MSR. As with IA32_PRED_CMD, permission for
>> unintercepted writes to this MSR will be granted to the guest after
>> the first non-zero write.
>
> I can't seem to find the Intel docs (even thought all the pages
> are all set), but my understanding is that the ARCH_CAPABILITIES Bit(3)
> would suffice. That is it says that for nested OSes you shouldn't
> use the IA32_FLUSH_CMD?

If the L0 hypervisor performs an L1D$ flush on every emulated VM-entry
from L1 to L2 (which kvm may or may not do, depending on its
configuration) and if it reports that in
IA32_ARCH_CAPABILITIES.SKIP_L1DFL_VMENTRY[bit 3], then it would be
pointless for the L1 hypervisor to use IA32_FLUSH_CMD on VM-entry to
L2. However, (a) the L0 hypervisor may not perform an L1D$ flush on
every emulated VM-entry from L1 to L2, and (b) there may be other
reasons that an L1 guest (hypervisor or not) wants to perform an L1D$
flush using IA32_FLUSH_CMD. It is more forward-thinking to provide
this capability than it is not to. And it's all configurable from
userspace, so if you don't want it, you don't have to enable it.

Patch
diff mbox series

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 46b428c0990e0..1b40265376641 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3996,6 +3996,28 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static void vmx_leave_nested(struct kvm_vcpu *vcpu);
 
+static bool valid_msr_setting(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	bool valid;
+
+	switch (msr_info->index) {
+	case MSR_IA32_PRED_CMD:
+		valid = (msr_info->host_initiated ||
+			 guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) &&
+			!(msr_info->data & ~PRED_CMD_IBPB);
+		break;
+	case MSR_IA32_FLUSH_CMD:
+		valid = (msr_info->host_initiated ||
+			 guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)) &&
+			!(msr_info->data & ~L1D_FLUSH);
+		break;
+	default:
+		valid = false;
+		break;
+	}
+	return valid;
+}
+
 /*
  * Writes msr value into into the appropriate "register".
  * Returns 0 on success, non-0 otherwise.
@@ -4077,17 +4099,14 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 					      MSR_TYPE_RW);
 		break;
 	case MSR_IA32_PRED_CMD:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
-			return 1;
-
-		if (data & ~PRED_CMD_IBPB)
+	case MSR_IA32_FLUSH_CMD:
+		if (!valid_msr_setting(vcpu, msr_info))
 			return 1;
 
 		if (!data)
 			break;
 
-		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+		wrmsrl(msr_index, data);
 
 		/*
 		 * For non-nested:
@@ -4100,7 +4119,7 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 * vmcs02.msr_bitmap here since it gets completely overwritten
 		 * in the merging.
 		 */
-		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
+		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, msr_index,
 					      MSR_TYPE_W);
 		break;
 	case MSR_IA32_ARCH_CAPABILITIES:
@@ -11089,7 +11108,7 @@  static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 	unsigned long *msr_bitmap_l1;
 	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
 	/*
-	 * pred_cmd & spec_ctrl are trying to verify two things:
+	 * pred_cmd, flush_cmd & spec_ctrl are trying to verify two things:
 	 *
 	 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
 	 *    ensures that we do not accidentally generate an L02 MSR bitmap
@@ -11102,6 +11121,7 @@  static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 	 *    the MSR.
 	 */
 	bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
+	bool flush_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_FLUSH_CMD);
 	bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
 
 	/* Nothing to do if the MSR bitmap is not in use.  */
@@ -11110,7 +11130,7 @@  static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 		return false;
 
 	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
-	    !pred_cmd && !spec_ctrl)
+	    !pred_cmd && !flush_cmd && !spec_ctrl)
 		return false;
 
 	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
@@ -11165,6 +11185,12 @@  static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 					MSR_IA32_PRED_CMD,
 					MSR_TYPE_W);
 
+	if (flush_cmd)
+		nested_vmx_disable_intercept_for_msr(
+					msr_bitmap_l1, msr_bitmap_l0,
+					MSR_IA32_FLUSH_CMD,
+					MSR_TYPE_W);
+
 	kunmap(page);
 	kvm_release_page_clean(page);