diff mbox

[6/24] Implement reading and writing of VMX MSRs

Message ID 201006131225.o5DCPctX012929@rice.haifa.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nadav Har'El June 13, 2010, 12:25 p.m. UTC
None
diff mbox

Patch

--- .before/arch/x86/kvm/x86.c	2010-06-13 15:01:28.000000000 +0300
+++ .after/arch/x86/kvm/x86.c	2010-06-13 15:01:28.000000000 +0300
@@ -702,7 +702,11 @@  static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-	MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+	MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+	MSR_IA32_FEATURE_CONTROL,  MSR_IA32_VMX_BASIC,
+	MSR_IA32_VMX_PINBASED_CTLS, MSR_IA32_VMX_PROCBASED_CTLS,
+	MSR_IA32_VMX_EXIT_CTLS, MSR_IA32_VMX_ENTRY_CTLS,
+	MSR_IA32_VMX_PROCBASED_CTLS2, MSR_IA32_VMX_EPT_VPID_CAP,
 };
 
 static unsigned num_msrs_to_save;
--- .before/arch/x86/kvm/vmx.c	2010-06-13 15:01:28.000000000 +0300
+++ .after/arch/x86/kvm/vmx.c	2010-06-13 15:01:28.000000000 +0300
@@ -1231,6 +1231,98 @@  static void guest_write_tsc(u64 guest_ts
 }
 
 /*
+ * If we allow our guest to use VMX instructions, we should also let it use
+ * VMX-specific MSRs.
+ */
+static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+{
+	u64 vmx_msr = 0;
+	u32 vmx_msr_high, vmx_msr_low;
+
+	switch (msr_index) {
+	case MSR_IA32_FEATURE_CONTROL:
+		*pdata = 0;
+		break;
+	case MSR_IA32_VMX_BASIC:
+		/*
+		 * This MSR reports some information about VMX support of the
+		 * processor. We should return information about the VMX we
+		 * emulate for the guest, and the VMCS structure we give it -
+		 * not about the VMX support of the underlying hardware. Some
+		 * However, some capabilities of the underlying hardware are
+		 * used directly by our emulation (e.g., the physical address
+		 * width), so these are copied from what the hardware reports.
+		 */
+		*pdata = VMCS12_REVISION |
+			(((u64)sizeof(struct vmcs12)) << 32);
+		rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr);
+#define VMX_BASIC_64		0x0001000000000000LLU
+#define VMX_BASIC_MEM_TYPE	0x003c000000000000LLU
+#define VMX_BASIC_INOUT		0x0040000000000000LLU
+		*pdata |= vmx_msr &
+			(VMX_BASIC_64 | VMX_BASIC_MEM_TYPE | VMX_BASIC_INOUT);
+		break;
+#define CORE2_PINBASED_CTLS_MUST_BE_ONE  0x00000016
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS  0x48d
+	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+	case MSR_IA32_VMX_PINBASED_CTLS:
+		vmx_msr_low  = CORE2_PINBASED_CTLS_MUST_BE_ONE;
+		vmx_msr_high = CORE2_PINBASED_CTLS_MUST_BE_ONE |
+				PIN_BASED_EXT_INTR_MASK |
+				PIN_BASED_NMI_EXITING |
+				PIN_BASED_VIRTUAL_NMIS;
+		*pdata = vmx_msr_low | ((u64)vmx_msr_high << 32);
+		break;
+	case MSR_IA32_VMX_PROCBASED_CTLS:
+		/* This MSR determines which vm-execution controls the L1
+		 * hypervisor may ask, or may not ask, to enable. Normally we
+		 * can only allow enabling features which the hardware can
+		 * support, but we limit ourselves to allowing only known
+		 * features that were tested nested. We allow disabling any
+		 * feature (even if the hardware can't disable it).
+		 */
+		rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+
+		vmx_msr_low = 0; /* allow disabling any feature */
+		vmx_msr_high &= /* do not expose new untested features */
+			CPU_BASED_HLT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
+			CPU_BASED_CR3_STORE_EXITING | CPU_BASED_USE_IO_BITMAPS |
+			CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING |
+			CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING |
+			CPU_BASED_INVLPG_EXITING | CPU_BASED_TPR_SHADOW |
+			CPU_BASED_USE_MSR_BITMAPS |
+#ifdef CONFIG_X86_64
+			CPU_BASED_CR8_LOAD_EXITING |
+			CPU_BASED_CR8_STORE_EXITING |
+#endif
+			CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+		*pdata = vmx_msr_low | ((u64)vmx_msr_high << 32);
+		break;
+	case MSR_IA32_VMX_EXIT_CTLS:
+		*pdata = 0;
+#ifdef CONFIG_X86_64
+		*pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
+#endif
+		break;
+	case MSR_IA32_VMX_ENTRY_CTLS:
+		*pdata = 0;
+		break;
+	case MSR_IA32_VMX_PROCBASED_CTLS2:
+		*pdata = 0;
+		if (vm_need_virtualize_apic_accesses(vcpu->kvm))
+			*pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+		break;
+	case MSR_IA32_VMX_EPT_VPID_CAP:
+		*pdata = 0;
+		break;
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
@@ -1278,6 +1370,8 @@  static int vmx_get_msr(struct kvm_vcpu *
 		/* Otherwise falls through */
 	default:
 		vmx_load_host_state(to_vmx(vcpu));
+		if (nested && !nested_vmx_get_msr(vcpu, msr_index, &data))
+			break;
 		msr = find_msr_entry(to_vmx(vcpu), msr_index);
 		if (msr) {
 			vmx_load_host_state(to_vmx(vcpu));
@@ -1292,6 +1386,27 @@  static int vmx_get_msr(struct kvm_vcpu *
 }
 
 /*
+ * Writes msr value for nested virtualization
+ * Returns 0 on success, non-0 otherwise.
+ */
+static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+{
+	switch (msr_index) {
+	case MSR_IA32_FEATURE_CONTROL:
+		if ((data & (FEATURE_CONTROL_LOCKED |
+			     FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX))
+		    != (FEATURE_CONTROL_LOCKED |
+			FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX))
+			return 1;
+		break;
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
  * Writes msr value into into the appropriate "register".
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
@@ -1349,6 +1464,9 @@  static int vmx_set_msr(struct kvm_vcpu *
 			return 1;
 		/* Otherwise falls through */
 	default:
+		if (nested &&
+		    !nested_vmx_set_msr(vcpu, msr_index, data))
+			break;
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
 			vmx_load_host_state(vmx);