@@ -457,6 +457,12 @@ struct vmx_msr_entry {
#define ENTRY_FAIL_VMCS_LINK_PTR 4
/*
+ * VMX Abort codes
+ */
+#define VMX_ABORT_MSR_STORE_FAILURE 1
+#define VMX_ABORT_MSR_LOAD_FAILURE 4
+
+/*
* VM-instruction error numbers
*/
enum vm_instruction_error_number {
@@ -316,6 +316,9 @@
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
+#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
+#define MSR_IA32_SMBASE 0x0000009e
+
#define MSR_IA32_PERF_STATUS 0x00000198
#define MSR_IA32_PERF_CTL 0x00000199
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
@@ -56,6 +56,7 @@
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MSR_LOAD_FAILURE 34
#define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_MONITOR_INSTRUCTION 39
#define EXIT_REASON_PAUSE_INSTRUCTION 40
@@ -116,6 +117,7 @@
{ EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
+ { EXIT_REASON_MSR_LOAD_FAILURE, "MSR_LOAD_FAILURE" }, \
{ EXIT_REASON_INVD, "INVD" }, \
{ EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
@@ -8571,6 +8571,173 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
}
+static bool vmx_msr_switch_area_verify(struct kvm_vcpu *vcpu,
+ unsigned long count_field,
+ unsigned long addr_field,
+ int maxphyaddr)
+{
+ u64 count, addr;
+
+ if (vmcs12_read_any(vcpu, count_field, &count) ||
+ vmcs12_read_any(vcpu, addr_field, &addr)) {
+ WARN_ON(1);
+ return false;
+ }
+ if (!IS_ALIGNED(addr, 16))
+ goto fail;
+ if (addr >> maxphyaddr)
+ goto fail;
+ if ((addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr)
+ goto fail;
+ return true;
+fail:
+ pr_warn_ratelimited(
+ "nVMX: invalid MSR switch (0x%lx, 0x%lx, %d, %llu, 0x%08llx)",
+ count_field, addr_field, maxphyaddr, count, addr);
+ return false;
+}
+
+static bool nested_vmx_msr_switch_verify(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ int maxphyaddr;
+
+ if (vmcs12->vm_exit_msr_load_count == 0 &&
+ vmcs12->vm_exit_msr_store_count == 0 &&
+ vmcs12->vm_entry_msr_load_count == 0)
+ return true; /* Fast path */
+ maxphyaddr = cpuid_maxphyaddr(vcpu);
+ return vmx_msr_switch_area_verify(vcpu, VM_EXIT_MSR_LOAD_COUNT,
+ VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) &&
+ vmx_msr_switch_area_verify(vcpu, VM_EXIT_MSR_STORE_COUNT,
+ VM_EXIT_MSR_STORE_ADDR,
+ maxphyaddr) &&
+ vmx_msr_switch_area_verify(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
+ VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr);
+}
+
+static bool vmx_load_msr_entry_verify(struct kvm_vcpu *vcpu,
+ struct vmx_msr_entry *e)
+{
+ if (e->index == MSR_FS_BASE || e->index == MSR_GS_BASE)
+ return false;
+ /* SMM is not supported */
+ if (e->index == MSR_IA32_SMM_MONITOR_CTL)
+ return false;
+ /* x2APIC MSR accesses are not allowed */
+ if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
+ return false;
+ if (e->reserved != 0)
+ return false;
+ return true;
+}
+
+static bool vmx_msr_switch_is_protected_msr(u32 msr_index)
+{
+ /*
+ * Intel SDM: a processor MAY prevent writing to these MSRs when
+ * loading guest states on VM entries or saving guest states on VM
+ * exits.
+ * Assume our emulated processor DOES prevent writing.
+ */
+ return msr_index == MSR_IA32_UCODE_WRITE ||
+ msr_index == MSR_IA32_UCODE_REV;
+}
+
+static unsigned int nested_vmx_load_msrs(struct kvm_vcpu *vcpu,
+ u32 count, u64 addr)
+{
+ unsigned int i;
+ struct vmx_msr_entry msr_entry;
+ struct msr_data msr;
+
+ for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
+ if (kvm_read_guest(vcpu->kvm, addr,
+ &msr_entry, sizeof(msr_entry))) {
+ pr_warn_ratelimited(
+ "%s MSR %u: cannot read GPA: 0x%llx\n",
+ __func__, i, addr);
+ return i;
+ }
+ if (!vmx_load_msr_entry_verify(vcpu, &msr_entry)) {
+ pr_warn_ratelimited(
+ "%s MSR %u: invalid MSR entry 0x%x 0x%x\n",
+ __func__, i, msr_entry.index,
+ msr_entry.reserved);
+ return i;
+ }
+ msr.host_initiated = false;
+ msr.index = msr_entry.index;
+ msr.data = msr_entry.value;
+ if (vmx_msr_switch_is_protected_msr(msr.index)) {
+ pr_warn_ratelimited(
+ "%s MSR %u: prevent writing to MSR 0x%x\n",
+ __func__, i, msr.index);
+ return i;
+ }
+ if (kvm_set_msr(vcpu, &msr)) {
+ pr_warn_ratelimited(
+ "%s MSR %u: cannot write 0x%llx to MSR 0x%x\n",
+ __func__, i, msr.data, msr.index);
+ return i;
+ }
+ }
+ return 0;
+}
+
+static bool vmx_store_msr_entry_verify(struct kvm_vcpu *vcpu,
+ struct vmx_msr_entry *e)
+{
+ /* x2APIC MSR accesses are not allowed */
+ if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
+ return false;
+ /* SMM is not supported */
+ if (e->index == MSR_IA32_SMBASE)
+ return false;
+ if (e->reserved != 0)
+ return false;
+ return true;
+}
+
+static unsigned int nested_vmx_store_msrs(struct kvm_vcpu *vcpu,
+ u32 count, u64 addr)
+{
+ unsigned int i;
+ struct vmx_msr_entry msr_entry;
+
+ for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
+ if (kvm_read_guest(vcpu->kvm, addr,
+ &msr_entry, 2 * sizeof(u32))) {
+ pr_warn_ratelimited(
+ "%s MSR %u: cannot read GPA: 0x%llx\n",
+ __func__, i, addr);
+ return i;
+ }
+ if (!vmx_store_msr_entry_verify(vcpu, &msr_entry)) {
+ pr_warn_ratelimited(
+ "%s MSR %u: invalid MSR entry 0x%x 0x%x\n",
+ __func__, i, msr_entry.index,
+ msr_entry.reserved);
+ return i;
+ }
+ if (vmx_get_msr(vcpu, msr_entry.index, &msr_entry.value)) {
+ pr_warn_ratelimited(
+ "%s MSR %u: cannot read MSR 0x%x\n",
+ __func__, i, msr_entry.index);
+ return i;
+ }
+ if (kvm_write_guest(vcpu->kvm,
+ addr + offsetof(struct vmx_msr_entry, value),
+ &msr_entry.value, sizeof(msr_entry.value))) {
+ pr_warn_ratelimited(
+ "%s MSR %u: cannot write to GPA: 0x%llx\n",
+ __func__, i, addr);
+ return i;
+ }
+ }
+ return 0;
+}
+
/*
* nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
* for running an L2 nested guest.
@@ -8580,6 +8747,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
int cpu;
+ unsigned int msr;
struct loaded_vmcs *vmcs02;
bool ia32e;
@@ -8629,11 +8797,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}
- if (vmcs12->vm_entry_msr_load_count > 0 ||
- vmcs12->vm_exit_msr_load_count > 0 ||
- vmcs12->vm_exit_msr_store_count > 0) {
- pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
- __func__);
+ if (!nested_vmx_msr_switch_verify(vcpu, vmcs12)) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return 1;
}
@@ -8739,10 +8903,20 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
vmx_segment_cache_clear(vmx);
- vmcs12->launch_state = 1;
-
prepare_vmcs02(vcpu, vmcs12);
+ msr = nested_vmx_load_msrs(vcpu, vmcs12->vm_entry_msr_load_count,
+ vmcs12->vm_entry_msr_load_addr);
+ if (msr) {
+ leave_guest_mode(vcpu);
+ vmx_load_vmcs01(vcpu);
+ nested_vmx_entry_failure(vcpu, vmcs12,
+ EXIT_REASON_MSR_LOAD_FAILURE, msr);
+ return 1;
+ }
+
+ vmcs12->launch_state = 1;
+
if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
return kvm_emulate_halt(vcpu);
@@ -9040,6 +9214,15 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_clear_interrupt_queue(vcpu);
}
+static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 abort_code)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ vmcs12->abort = abort_code;
+ pr_warn("Nested VMX abort %u\n", abort_code);
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+}
+
/*
* A part of what we need to when the nested L2 guest exits and we want to
* run its L1 parent, is to reset L1's guest state to the host state specified
@@ -9172,6 +9355,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
kvm_set_dr(vcpu, 7, 0x400);
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+ if (nested_vmx_load_msrs(vcpu, vmcs12->vm_exit_msr_load_count,
+ vmcs12->vm_exit_msr_load_addr))
+ nested_vmx_abort(vcpu, VMX_ABORT_MSR_LOAD_FAILURE);
}
/*
@@ -9193,6 +9380,11 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
exit_qualification);
+ if (nested_vmx_store_msrs(vcpu,
+ vmcs12->vm_exit_msr_store_count,
+ vmcs12->vm_exit_msr_store_addr))
+ nested_vmx_abort(vcpu, VMX_ABORT_MSR_STORE_FAILURE);
+
vmx_load_vmcs01(vcpu);
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
@@ -1579,6 +1579,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
}
return 0;
}
+EXPORT_SYMBOL_GPL(kvm_write_guest);
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
gpa_t gpa, unsigned long len)
Several hypervisors use MSR loading/storing to run guests on VMX. This patch implements emulation of this feature and allows these hypervisors to work in L1. The following is emulated: - Loading MSRs on VM-entries - Saving MSRs on VM-exits - Loading MSRs on VM-exits Actions taken on loading MSRs: - MSR load area is verified - For each MSR entry from load area: - MSR load entry is read and verified - MSR value is safely written Actions taken on storing MSRs: - MSR store area is verified - For each MSR entry from store area: - MSR entry is read and verified - MSR value is safely read using MSR index from MSR entry - MSR value is written to MSR entry The code performs checks required by Intel Software Developer Manual. Signed-off-by: Eugene Korenevsky <ekorenevsky@gmail.com> --- arch/x86/include/asm/vmx.h | 6 + arch/x86/include/uapi/asm/msr-index.h | 3 + arch/x86/include/uapi/asm/vmx.h | 2 + arch/x86/kvm/vmx.c | 206 ++++++++++++++++++++++++++++++++-- virt/kvm/kvm_main.c | 1 + 5 files changed, 211 insertions(+), 7 deletions(-)