@@ -2593,11 +2593,38 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
/* Reject the instructions other than VMCALL/VMMCALL when
* try to emulate invalid opcode */
c = &vcpu->arch.emulate_ctxt.decode;
- if ((emulation_type & EMULTYPE_TRAP_UD) &&
- (!(c->twobyte && c->b == 0x01 &&
- (c->modrm_reg == 0 || c->modrm_reg == 3) &&
- c->modrm_mod == 3 && c->modrm_rm == 1)))
- return EMULATE_FAIL;
+
+ if (emulation_type & EMULTYPE_TRAP_UD) {
+ if (!c->twobyte)
+ return EMULATE_FAIL;
+ switch (c->b) {
+ case 0x01: /* VMMCALL */
+ if (c->modrm_mod != 3)
+ return EMULATE_FAIL;
+ if (c->modrm_rm != 1)
+ return EMULATE_FAIL;
+ break;
+ case 0x34: /* sysenter */
+ case 0x35: /* sysexit */
+ if (c->modrm_mod != 0)
+ return EMULATE_FAIL;
+ if (c->modrm_rm != 0)
+ return EMULATE_FAIL;
+ break;
+ case 0x05: /* syscall */
+ r = 0;
+ if (c->modrm_mod != 0)
+ return EMULATE_FAIL;
+ if (c->modrm_rm != 0)
+ return EMULATE_FAIL;
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+
+ if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
+ return EMULATE_FAIL;
+ }
++vcpu->stat.insn_emulation;
if (r) {
@@ -32,6 +32,8 @@
#include <linux/module.h>
#include <asm/kvm_x86_emulate.h>
+#include "mmu.h"
+
/*
* Opcode effective-address decode tables.
* Note that we only emulate instructions that have at least one memory
@@ -217,7 +219,9 @@ static u32 twobyte_table[256] = {
ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x30 - 0x3F */
- ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ImplicitOps, 0, ImplicitOps, 0,
+ ImplicitOps, ImplicitOps, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x40 - 0x47 */
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -320,8 +324,11 @@ static u32 group2_table[] = {
};
/* EFLAGS bit definitions. */
+#define EFLG_VM (1<<17)
+#define EFLG_RF (1<<16)
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
+#define EFLG_IF (1<<9)
#define EFLG_SF (1<<7)
#define EFLG_ZF (1<<6)
#define EFLG_AF (1<<4)
@@ -1985,10 +1992,114 @@ twobyte_insn:
goto cannot_emulate;
}
break;
+ case 0x05: { /* syscall */
+ unsigned long cr0 = ctxt->vcpu->arch.cr0;
+ struct kvm_segment cs, ss;
+
+ memset(&cs, 0, sizeof(struct kvm_segment));
+ memset(&ss, 0, sizeof(struct kvm_segment));
+
+ /* inject #UD if
+ * 1. we are in real mode
+ * 2. protected mode is not enabled
+ * 3. LOCK prefix is used
+ */
+ if ((ctxt->mode == X86EMUL_MODE_REAL)
+ || (!(cr0 & X86_CR0_PE))
+ || (c->lock_prefix)) {
+ /* we don't need to inject #UD here, because
+ * when emulate_instruction() returns something else
+ * than EMULATE_DONE, then svm.c:ud_interception()
+ * will do that for us.
+ */
+ goto cannot_emulate;
+ }
+
+ /* inject #UD if syscall/sysret are disabled. */
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_K6_EFER, &msr_data);
+ if ((msr_data & EFER_SCE) == 0) {
+ /* we don't need to inject #UD here, because
+ * when emulate_instruction() returns something else
+ * than EMULATE_DONE, then svm.c:ud_interception()
+ * will do that for us.
+ */
+ goto cannot_emulate;
+ }
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+ msr_data >>= 32;
+
+ cs.selector = (u16)(msr_data & 0xfffc);
+ cs.l = 0; /* will be adjusted later */
+ cs.base = 0; /* flat segment */
+ cs.g = 1; /* 4kb granularity */
+ cs.limit = 0xfffff; /* 4GB limit */
+ cs.type = 0x0b; /* Read, Execute, Accessed */
+ cs.dpl = 0; /* will be adjusted later */
+ cs.present = 1;
+ cs.s = 1;
+ cs.db = 1;
+
+ ss.unusable = 0;
+ ss.selector = (u16)(msr_data + 8);
+ ss.base = 0;
+ ss.type = 0x03; /* Read/Write, Expand-Up, Accessed */
+ ss.present = 1;
+ ss.s = 1;
+ ss.db = 1;
+
+ if (is_long_mode(ctxt->vcpu)) {
+
+ cs.db = 0;
+ cs.l = 1; /* long mode */
+
+ c->regs[VCPU_REGS_RCX] = c->eip;
+ c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
+
+ switch (ctxt->mode) {
+ case X86EMUL_MODE_PROT64:
+ /* Intel cares about granularity (g bit),
+ * so we don't set the effective limit.
+ */
+ cs.g = 1;
+ cs.limit = 0xffffffff;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu,
+ MSR_LSTAR, &msr_data);
+ break;
+ case X86EMUL_MODE_PROT32:
+ /* compat mode */
+ kvm_x86_ops->get_msr(ctxt->vcpu,
+ MSR_CSTAR, &msr_data);
+ break;
+ }
+
+ c->eip = msr_data;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu,
+ MSR_SYSCALL_MASK, &msr_data);
+ ctxt->eflags &= ~(msr_data | EFLG_RF);
+ } else {
+ /* legacy mode */
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+ c->regs[VCPU_REGS_RCX] = c->eip;
+ c->eip = (u32)msr_data;
+
+ ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
+ }
+
+ kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
+
+ goto writeback;
+ break;
+ }
case 0x06:
emulate_clts(ctxt->vcpu);
c->dst.type = OP_NONE;
break;
+ case 0x07: /* sysret */
case 0x08: /* invd */
case 0x09: /* wbinvd */
case 0x0d: /* GrpP (prefetch) */
@@ -2051,6 +2162,242 @@ twobyte_insn:
rc = X86EMUL_CONTINUE;
c->dst.type = OP_NONE;
break;
+ case 0x34: { /* sysenter */
+ /* Intel manual Vol 2b */
+ unsigned long cr0 = ctxt->vcpu->arch.cr0;
+ struct kvm_segment cs, ss;
+
+ memset(&cs, 0, sizeof(struct kvm_segment));
+ memset(&ss, 0, sizeof(struct kvm_segment));
+
+ /* XXX sysenter/sysexit have not been tested in 64bit mode.
+ * Therefore, we inject an #UD.
+ */
+ if (ctxt->mode == X86EMUL_MODE_PROT64) {
+ /* we don't need to inject #UD here, because
+ * when emulate_instruction() returns something else
+ * than EMULATE_DONE, then svm.c:ud_interception()
+ * will do that for us.
+ */
+ goto cannot_emulate;
+ }
+
+ if ((ctxt->mode == X86EMUL_MODE_REAL) ||
+ (!(cr0 & X86_CR0_PE))) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto cannot_emulate;
+ }
+
+ /* inject #UD if LOCK prefix is used */
+ if (c->lock_prefix) {
+ /* we don't need to inject #UD here, because
+ * when emulate_instruction() returns something else
+ * than EMULATE_DONE, then svm.c:ud_interception()
+ * will do that for us.
+ */
+ goto cannot_emulate;
+ }
+
+ kvm_x86_ops->get_msr(ctxt->vcpu,
+ MSR_IA32_SYSENTER_CS, &msr_data);
+ switch (ctxt->mode) {
+ case X86EMUL_MODE_PROT32:
+ if ((msr_data & 0xfffc) != 0x0)
+ break;
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto cannot_emulate;
+ case X86EMUL_MODE_PROT64:
+ if (msr_data != 0x0)
+ break;
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto cannot_emulate;
+ }
+
+ ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
+
+ kvm_x86_ops->get_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+
+ cs.selector = (u16)msr_data;
+ cs.base = 0; /* flat segment */
+ cs.limit = 0xfffff; /* 4GB limit */
+ cs.g = 1; /* 4kb granularity */
+ cs.s = 1;
+ cs.type = 0x0b; /* Execute + Read, Accessed */
+ cs.db = 1; /* 32bit code segment */
+ cs.dpl = 0;
+ cs.selector &= ~SELECTOR_RPL_MASK;
+ cs.present = 1;
+
+ /* No need to set cpl explicitely here. set_segment()
+ * does this below based on the cs.dpl value.
+ */
+
+ ss.unusable = 0;
+ ss.selector = cs.selector + 8;
+ ss.base = 0; /* flat segment */
+ ss.limit = 0xfffff; /* 4GB limit */
+ ss.g = 1; /* 4kb granularity */
+ ss.s = 1;
+ ss.type = 0x03; /* Read/Write, Accessed */
+ ss.db = 1; /* 32bit stack segment */
+ ss.dpl = 0;
+ ss.selector &= ~SELECTOR_RPL_MASK;
+ ss.present = 1;
+
+ switch (ctxt->mode) {
+ case X86EMUL_MODE_PROT32:
+ if (!is_long_mode(ctxt->vcpu))
+ break;
+ /* fallthrough */
+ case X86EMUL_MODE_PROT64:
+ cs.base = 0;
+ cs.db = 0;
+ cs.l = 1;
+ cs.limit = 0xffffffff;
+ ss.base = 0;
+ ss.limit = 0xffffffff;
+ break;
+ default:
+ break;
+ }
+
+ kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
+
+ kvm_x86_ops->get_msr(ctxt->vcpu,
+ MSR_IA32_SYSENTER_EIP, &msr_data);
+ c->eip = msr_data;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu,
+ MSR_IA32_SYSENTER_ESP, &msr_data);
+ c->regs[VCPU_REGS_RSP] = msr_data;
+
+ goto writeback;
+ break;
+ }
+ case 0x35: { /* sysexit */
+ /* Intel manual Vol 2b */
+ unsigned long cr0 = ctxt->vcpu->arch.cr0;
+ struct kvm_segment cs, ss;
+ int usermode;
+
+ memset(&cs, 0, sizeof(struct kvm_segment));
+ memset(&ss, 0, sizeof(struct kvm_segment));
+
+ if ((ctxt->mode == X86EMUL_MODE_REAL)
+ || (!(cr0 & X86_CR0_PE))
+ || (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto cannot_emulate;
+ }
+
+ /* inject #UD if LOCK prefix is used */
+ if (c->lock_prefix) {
+ /* we don't need to inject #UD here, because
+ * when emulate_instruction() returns something else
+ * than EMULATE_DONE, then svm.c:ud_interception()
+ * will do that for us.
+ */
+ goto cannot_emulate;
+ }
+
+ /* TODO: Check if rip and rsp are canonical.
+ * inject_gp() if not
+ */
+
+ /* if REX.W bit is set ... */
+ if ((c->rex_prefix & 0x8) != 0x0) {
+ /* Application is in 64bit mode */
+ usermode = X86EMUL_MODE_PROT64;
+ } else {
+ /* Application is in 32bit legacy/compat mode */
+ usermode = X86EMUL_MODE_PROT32;
+ }
+
+ kvm_x86_ops->get_msr(ctxt->vcpu,
+ MSR_IA32_SYSENTER_CS, &msr_data);
+ kvm_x86_ops->get_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ switch (usermode) {
+ case X86EMUL_MODE_PROT32:
+ cs.selector = (u16)(msr_data + 16);
+ if ((msr_data & 0xfffc) != 0x0)
+ break;
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto cannot_emulate;
+ case X86EMUL_MODE_PROT64:
+ cs.selector = (u16)(msr_data + 32);
+ if (msr_data != 0x0)
+ break;
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto cannot_emulate;
+ }
+
+ cs.base = 0; /* flat segment */
+ cs.limit = 0xfffff; /* 4GB limit */
+ cs.g = 1; /* 4kb granularity */
+ cs.s = 1;
+ cs.type = 0x0b; /* Execute, Read, Non-conforming code */
+ cs.db = 1; /* 32bit code segment */
+ cs.dpl = 3;
+ cs.selector |= SELECTOR_RPL_MASK;
+ cs.present = 1;
+ cs.l = 0; /* For return to compatibility mode */
+
+ /* No need to set cpl explicitely here. set_segment()
+ * does this below based on the cs.dpl value.
+ */
+
+ switch (usermode) {
+ case X86EMUL_MODE_PROT32:
+ ss.selector = (u16)(msr_data + 24);
+ break;
+ case X86EMUL_MODE_PROT64:
+ ss.selector = (cs.selector + 8);
+ break;
+ }
+ ss.base = 0; /* flat segment */
+ ss.limit = 0xfffff; /* 4GB limit */
+ ss.g = 1; /* 4kb granularity */
+ ss.s = 1;
+ ss.type = 0x03; /* Expand Up, Read/Write, Data */
+ ss.db = 1; /* 32bit stack segment */
+ ss.dpl = 3;
+ ss.selector |= SELECTOR_RPL_MASK;
+ ss.present = 1;
+
+ switch (usermode) {
+ case X86EMUL_MODE_PROT32:
+ /* We don't care about cs.g/ss.g bits
+ * (= 4kb granularity) so we have to set the effective
+ * limit here or we get a #GP in the guest, otherwise.
+ */
+ cs.limit = 0xffffffff;
+ ss.limit = 0xffffffff;
+ break;
+ case X86EMUL_MODE_PROT64:
+ /* We don't care about cs.g/ss.g bits
+ * (= 4kb granularity) so we have to set the effective
+ * limit here or we get a #GP in the guest, otherwise.
+ */
+ cs.base = 0;
+ cs.db = 0;
+ cs.l = 1;
+ cs.limit = 0xffffffff;
+ ss.base = 0;
+ ss.limit = 0xffffffff;
+ break;
+ default:
+ break;
+ }
+ kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
+
+ c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
+ c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
+
+ goto writeback;
+ break;
+ }
case 0x40 ... 0x4f: /* cmov */
c->dst.val = c->dst.orig_val = c->src.val;
if (!test_cc(c->b, ctxt->eflags))