diff mbox

[v3] SVM: use generic instruction decoding

Message ID 57EE4EBC0200007800114146@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Sept. 30, 2016, 9:38 a.m. UTC
... instead of custom handling. To facilitate this break out init code
from _hvm_emulate_one() into the new hvm_emulate_init(), and make
hvmemul_insn_fetch( globally available.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v3: Add comment explaining debug/non-debug build differences. Use
    unsigned int for opc_tab[] index variable.
v2: Add comment to caller field. Rename REG_POISON to PTR_POISON. Align
    opc_tab[] initializer lines.
SVM: use generic instruction decoding

... instead of custom handling. To facilitate this break out init code
from _hvm_emulate_one() into the new hvm_emulate_init(), and make
hvmemul_insn_fetch( globally available.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v3: Add comment explaining debug/non-debug build differences. Use
    unsigned int for opc_tab[] index variable.
v2: Add comment to caller field. Rename REG_POISON to PTR_POISON. Align
    opc_tab[] initializer lines.

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -835,7 +835,7 @@ static int hvmemul_read(
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt));
 }
 
-static int hvmemul_insn_fetch(
+int hvmemul_insn_fetch(
     enum x86_segment seg,
     unsigned long offset,
     void *p_data,
@@ -1766,15 +1766,14 @@ static const struct x86_emulate_ops hvm_
     .vmfunc        = hvmemul_vmfunc,
 };
 
-static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
-    const struct x86_emulate_ops *ops)
+void hvm_emulate_init(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const unsigned char *insn_buf,
+    unsigned int insn_bytes)
 {
-    struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
     struct vcpu *curr = current;
-    uint32_t new_intr_shadow, pfec = PFEC_page_present;
-    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+    unsigned int pfec = PFEC_page_present;
     unsigned long addr;
-    int rc;
 
     if ( hvm_long_mode_enabled(curr) &&
          hvmemul_ctxt->seg_reg[x86_seg_cs].attr.fields.l )
@@ -1792,14 +1791,14 @@ static int _hvm_emulate_one(struct hvm_e
     if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
         pfec |= PFEC_user_mode;
 
-    hvmemul_ctxt->insn_buf_eip = regs->eip;
-    if ( !vio->mmio_insn_bytes )
+    hvmemul_ctxt->insn_buf_eip = hvmemul_ctxt->ctxt.regs->eip;
+    if ( !insn_bytes )
     {
         hvmemul_ctxt->insn_buf_bytes =
             hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf) ?:
             (hvm_virtual_to_linear_addr(x86_seg_cs,
                                         &hvmemul_ctxt->seg_reg[x86_seg_cs],
-                                        regs->eip,
+                                        hvmemul_ctxt->insn_buf_eip,
                                         sizeof(hvmemul_ctxt->insn_buf),
                                         hvm_access_insn_fetch,
                                         hvmemul_ctxt->ctxt.addr_size,
@@ -1811,11 +1810,24 @@ static int _hvm_emulate_one(struct hvm_e
     }
     else
     {
-        hvmemul_ctxt->insn_buf_bytes = vio->mmio_insn_bytes;
-        memcpy(hvmemul_ctxt->insn_buf, vio->mmio_insn, vio->mmio_insn_bytes);
+        hvmemul_ctxt->insn_buf_bytes = insn_bytes;
+        memcpy(hvmemul_ctxt->insn_buf, insn_buf, insn_bytes);
     }
 
     hvmemul_ctxt->exn_pending = 0;
+}
+
+static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const struct x86_emulate_ops *ops)
+{
+    const struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
+    struct vcpu *curr = current;
+    uint32_t new_intr_shadow;
+    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+    int rc;
+
+    hvm_emulate_init(hvmemul_ctxt, vio->mmio_insn, vio->mmio_insn_bytes);
+
     vio->mmio_retry = 0;
 
     if ( cpu_has_vmx )
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -15,7 +15,7 @@
  * this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <xen/config.h>
+#include <xen/err.h>
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <xen/trace.h>
@@ -26,41 +26,6 @@
 #include <asm/hvm/svm/vmcb.h>
 #include <asm/hvm/svm/emulate.h>
 
-static unsigned int is_prefix(u8 opc)
-{
-    switch ( opc )
-    {
-    case 0x66:
-    case 0x67:
-    case 0x2E:
-    case 0x3E:
-    case 0x26:
-    case 0x64:
-    case 0x65:
-    case 0x36:
-    case 0xF0:
-    case 0xF3:
-    case 0xF2:
-    case 0x40 ... 0x4f:
-        return 1;
-    }
-    return 0;
-}
-
-static unsigned long svm_rip2pointer(struct vcpu *v, unsigned long *limit)
-{
-    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    unsigned long p = vmcb->cs.base + vmcb->rip;
-
-    if ( !(vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v)) )
-    {
-        *limit = vmcb->cs.limit;
-        return (u32)p; /* mask to 32 bits */
-    }
-    *limit = ~0UL;
-    return p;
-}
-
 static unsigned long svm_nextrip_insn_length(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -89,141 +54,104 @@ static unsigned long svm_nextrip_insn_le
     return vmcb->nextrip - vmcb->rip;
 }
 
-/* First byte: Length. Following bytes: Opcode bytes. */
-#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ }
-MAKE_INSTR(INVD,   2, 0x0f, 0x08);
-MAKE_INSTR(WBINVD, 2, 0x0f, 0x09);
-MAKE_INSTR(CPUID,  2, 0x0f, 0xa2);
-MAKE_INSTR(RDMSR,  2, 0x0f, 0x32);
-MAKE_INSTR(WRMSR,  2, 0x0f, 0x30);
-MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9);
-MAKE_INSTR(HLT,    1, 0xf4);
-MAKE_INSTR(INT3,   1, 0xcc);
-MAKE_INSTR(RDTSC,  2, 0x0f, 0x31);
-MAKE_INSTR(PAUSE,  1, 0x90);
-MAKE_INSTR(XSETBV, 3, 0x0f, 0x01, 0xd1);
-MAKE_INSTR(VMRUN,  3, 0x0f, 0x01, 0xd8);
-MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
-MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
-MAKE_INSTR(STGI,   3, 0x0f, 0x01, 0xdc);
-MAKE_INSTR(CLGI,   3, 0x0f, 0x01, 0xdd);
-MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf);
-
-static const u8 *const opc_bytes[INSTR_MAX_COUNT] =
-{
-    [INSTR_INVD]   = OPCODE_INVD,
-    [INSTR_WBINVD] = OPCODE_WBINVD,
-    [INSTR_CPUID]  = OPCODE_CPUID,
-    [INSTR_RDMSR]  = OPCODE_RDMSR,
-    [INSTR_WRMSR]  = OPCODE_WRMSR,
-    [INSTR_VMCALL] = OPCODE_VMCALL,
-    [INSTR_HLT]    = OPCODE_HLT,
-    [INSTR_INT3]   = OPCODE_INT3,
-    [INSTR_RDTSC]  = OPCODE_RDTSC,
-    [INSTR_PAUSE]  = OPCODE_PAUSE,
-    [INSTR_XSETBV] = OPCODE_XSETBV,
-    [INSTR_VMRUN]  = OPCODE_VMRUN,
-    [INSTR_VMLOAD] = OPCODE_VMLOAD,
-    [INSTR_VMSAVE] = OPCODE_VMSAVE,
-    [INSTR_STGI]   = OPCODE_STGI,
-    [INSTR_CLGI]   = OPCODE_CLGI,
-    [INSTR_INVLPGA] = OPCODE_INVLPGA,
+static const struct {
+    unsigned int opcode;
+    struct {
+        unsigned int rm:3;
+        unsigned int reg:3;
+        unsigned int mod:2;
+#define MODRM(mod, reg, rm) { rm, reg, mod }
+    } modrm;
+} const opc_tab[INSTR_MAX_COUNT] = {
+    [INSTR_PAUSE]   = { X86EMUL_OPC_F3(0, 0x90) },
+    [INSTR_INT3]    = { X86EMUL_OPC(   0, 0xcc) },
+    [INSTR_HLT]     = { X86EMUL_OPC(   0, 0xf4) },
+    [INSTR_XSETBV]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 2, 1) },
+    [INSTR_VMRUN]   = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 0) },
+    [INSTR_VMCALL]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 1) },
+    [INSTR_VMLOAD]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 2) },
+    [INSTR_VMSAVE]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 3) },
+    [INSTR_STGI]    = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 4) },
+    [INSTR_CLGI]    = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 5) },
+    [INSTR_INVLPGA] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 7) },
+    [INSTR_INVD]    = { X86EMUL_OPC(0x0f, 0x08) },
+    [INSTR_WBINVD]  = { X86EMUL_OPC(0x0f, 0x09) },
+    [INSTR_WRMSR]   = { X86EMUL_OPC(0x0f, 0x30) },
+    [INSTR_RDTSC]   = { X86EMUL_OPC(0x0f, 0x31) },
+    [INSTR_RDMSR]   = { X86EMUL_OPC(0x0f, 0x32) },
+    [INSTR_CPUID]   = { X86EMUL_OPC(0x0f, 0xa2) },
 };
 
-static bool_t fetch(const struct vmcb_struct *vmcb, u8 *buf,
-                    unsigned long addr, unsigned int len)
-{
-    uint32_t pfec = (vmcb_get_cpl(vmcb) == 3) ? PFEC_user_mode : 0;
-
-    switch ( hvm_fetch_from_guest_virt(buf, addr, len, pfec) )
-    {
-    case HVMCOPY_okay:
-        break;
-    case HVMCOPY_bad_gva_to_gfn:
-        /* OK just to give up; we'll have injected #PF already */
-        return 0;
-    default:
-        /* Not OK: fetches from non-RAM pages are not supportable. */
-        gdprintk(XENLOG_WARNING, "Bad instruction fetch at %#lx (%#lx)\n",
-                 vmcb->rip, addr);
-        hvm_inject_hw_exception(TRAP_gp_fault, 0);
-        return 0;
-    }
-    return 1;
-}
-
 int __get_instruction_length_from_list(struct vcpu *v,
         const enum instruction_index *list, unsigned int list_count)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    unsigned int i, j, inst_len = 0;
-    enum instruction_index instr = 0;
-    u8 buf[MAX_INST_LEN];
-    const u8 *opcode = NULL;
-    unsigned long fetch_addr, fetch_limit;
-    unsigned int fetch_len, max_len;
-
+    struct hvm_emulate_ctxt ctxt;
+    struct x86_emulate_state *state;
+    unsigned int inst_len, j, modrm_rm, modrm_reg;
+    int modrm_mod;
+
+    /*
+     * In debug builds, always use x86_decode_insn() and compare with
+     * hardware.
+     */
+#ifdef NDEBUG
     if ( (inst_len = svm_nextrip_insn_length(v)) != 0 )
         return inst_len;
 
     if ( vmcb->exitcode == VMEXIT_IOIO )
         return vmcb->exitinfo2 - vmcb->rip;
+#endif
 
-    /* Fetch up to the next page break; we'll fetch from the next page
-     * later if we have to. */
-    fetch_addr = svm_rip2pointer(v, &fetch_limit);
-    if ( vmcb->rip > fetch_limit )
-        return 0;
-    max_len = min(fetch_limit - vmcb->rip + 1, MAX_INST_LEN + 0UL);
-    fetch_len = min_t(unsigned int, max_len,
-                      PAGE_SIZE - (fetch_addr & ~PAGE_MASK));
-    if ( !fetch(vmcb, buf, fetch_addr, fetch_len) )
+    ASSERT(v == current);
+    hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
+    hvm_emulate_init(&ctxt, NULL, 0);
+    state = x86_decode_insn(&ctxt.ctxt, hvmemul_insn_fetch);
+    if ( IS_ERR_OR_NULL(state) )
         return 0;
 
-    while ( (inst_len < max_len) && is_prefix(buf[inst_len]) )
-    {
-        inst_len++;
-        if ( inst_len >= fetch_len )
-        {
-            if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len,
-                        max_len - fetch_len) )
-                return 0;
-            fetch_len = max_len;
-        }
+    inst_len = x86_insn_length(state, &ctxt.ctxt);
+    modrm_mod = x86_insn_modrm(state, &modrm_rm, &modrm_reg);
+    x86_emulate_free_state(state);
+#ifndef NDEBUG
+    if ( vmcb->exitcode == VMEXIT_IOIO )
+        j = vmcb->exitinfo2 - vmcb->rip;
+    else
+        j = svm_nextrip_insn_length(v);
+    if ( j && j != inst_len )
+    {
+        gprintk(XENLOG_WARNING, "insn-len[%02x]=%u (exp %u)\n",
+                ctxt.ctxt.opcode, inst_len, j);
+        return j;
     }
+#endif
 
     for ( j = 0; j < list_count; j++ )
     {
-        instr = list[j];
-        opcode = opc_bytes[instr];
+        unsigned int instr = list[j];
 
-        for ( i = 0; (i < opcode[0]) && ((inst_len + i) < max_len); i++ )
+        if ( instr >= ARRAY_SIZE(opc_tab) )
+        {
+            ASSERT_UNREACHABLE();
+            break;
+        }
+        if ( opc_tab[instr].opcode == ctxt.ctxt.opcode )
         {
-            if ( (inst_len + i) >= fetch_len ) 
-            {
-                if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len,
-                            max_len - fetch_len) )
-                    return 0;
-                fetch_len = max_len;
-            }
+            if ( !opc_tab[instr].modrm.mod )
+                return inst_len;
 
-            if ( buf[inst_len+i] != opcode[i+1] )
-                goto mismatch;
+            if ( modrm_mod == opc_tab[instr].modrm.mod &&
+                 (modrm_rm & 7) == opc_tab[instr].modrm.rm &&
+                 (modrm_reg & 7) == opc_tab[instr].modrm.reg )
+                return inst_len;
         }
-        goto done;
-    mismatch: ;
     }
 
     gdprintk(XENLOG_WARNING,
-             "%s: Mismatch between expected and actual instruction bytes: "
+             "%s: Mismatch between expected and actual instruction: "
              "eip = %lx\n",  __func__, (unsigned long)vmcb->rip);
     hvm_inject_hw_exception(TRAP_gp_fault, 0);
     return 0;
-
- done:
-    inst_len += opcode[0];
-    ASSERT(inst_len <= max_len);
-    return inst_len;
 }
 
 /*
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -382,9 +382,9 @@ struct operand {
     } mem;
 };
 #ifdef __x86_64__
-#define REG_POISON ((unsigned long *) 0x8086000000008086UL) /* non-canonical */
+#define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
 #else
-#define REG_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
+#define PTR_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
 #endif
 
 typedef union {
@@ -1646,6 +1646,14 @@ struct x86_emulate_state {
 
     unsigned long eip;
     struct cpu_user_regs *regs;
+
+#ifndef NDEBUG
+    /*
+     * Track caller of x86_decode_insn() to spot missing as well as
+     * premature calls to x86_emulate_free_state().
+     */
+    void *caller;
+#endif
 };
 
 /* Helper definitions. */
@@ -1673,6 +1681,11 @@ x86_decode_onebyte(
 
     switch ( ctxt->opcode )
     {
+    case 0x90: /* nop / pause */
+        if ( repe_prefix() )
+            ctxt->opcode |= X86EMUL_OPC_F3(0, 0);
+        break;
+
     case 0x9a: /* call (far, absolute) */
     case 0xea: /* jmp (far, absolute) */
         generate_exception_if(mode_64bit(), EXC_UD, -1);
@@ -1780,7 +1793,7 @@ x86_decode(
     override_seg = -1;
     ea.type = OP_MEM;
     ea.mem.seg = x86_seg_ds;
-    ea.reg = REG_POISON;
+    ea.reg = PTR_POISON;
     state->regs = ctxt->regs;
     state->eip = ctxt->regs->eip;
 
@@ -2267,8 +2280,8 @@ x86_emulate(
     struct x86_emulate_state state;
     int rc;
     uint8_t b, d;
-    struct operand src = { .reg = REG_POISON };
-    struct operand dst = { .reg = REG_POISON };
+    struct operand src = { .reg = PTR_POISON };
+    struct operand dst = { .reg = PTR_POISON };
     enum x86_swint_type swint_type;
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
@@ -2861,8 +2874,9 @@ x86_emulate(
         break;
 
     case 0x90: /* nop / xchg %%r8,%%rax */
+    case X86EMUL_OPC_F3(0, 0x90): /* pause / xchg %%r8,%%rax */
         if ( !(rex_prefix & 1) )
-            break; /* nop */
+            break; /* nop / pause */
         /* fall through */
 
     case 0x91 ... 0x97: /* xchg reg,%%rax */
@@ -5199,3 +5213,89 @@ x86_emulate(
 #undef vex
 #undef override_seg
 #undef ea
+
+#ifdef __XEN__
+
+#include <xen/err.h>
+
+struct x86_emulate_state *
+x86_decode_insn(
+    struct x86_emulate_ctxt *ctxt,
+    int (*insn_fetch)(
+        enum x86_segment seg, unsigned long offset,
+        void *p_data, unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt))
+{
+    static DEFINE_PER_CPU(struct x86_emulate_state, state);
+    struct x86_emulate_state *state = &this_cpu(state);
+    const struct x86_emulate_ops ops = {
+        .insn_fetch = insn_fetch,
+        .read       = x86emul_unhandleable_rw,
+        .write      = PTR_POISON,
+        .cmpxchg    = PTR_POISON,
+    };
+    int rc = x86_decode(state, ctxt, &ops);
+
+    if ( unlikely(rc != X86EMUL_OKAY) )
+        return ERR_PTR(-rc);
+
+#ifndef NDEBUG
+    /*
+     * While we avoid memory allocation (by use of per-CPU data) above,
+     * nevertheless make sure callers properly release the state structure
+     * for forward compatibility.
+     */
+    if ( state->caller )
+    {
+        printk(XENLOG_ERR "Unreleased emulation state acquired by %ps\n",
+               state->caller);
+        dump_execution_state();
+    }
+    state->caller = __builtin_return_address(0);
+#endif
+
+    return state;
+}
+
+static inline void check_state(const struct x86_emulate_state *state)
+{
+#ifndef NDEBUG
+    ASSERT(state->caller);
+#endif
+}
+
+#ifndef NDEBUG
+void x86_emulate_free_state(struct x86_emulate_state *state)
+{
+    check_state(state);
+    state->caller = NULL;
+}
+#endif
+
+int
+x86_insn_modrm(const struct x86_emulate_state *state,
+               unsigned int *rm, unsigned int *reg)
+{
+    check_state(state);
+
+    if ( !(state->desc & ModRM) )
+        return -EINVAL;
+
+    if ( rm )
+        *rm = state->modrm_rm;
+    if ( reg )
+        *reg = state->modrm_reg;
+
+    return state->modrm_mod;
+}
+
+unsigned int
+x86_insn_length(const struct x86_emulate_state *state,
+                const struct x86_emulate_ctxt *ctxt)
+{
+    check_state(state);
+
+    return state->eip - ctxt->regs->eip;
+}
+
+#endif
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -532,4 +532,29 @@ x86emul_unhandleable_rw(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt);
 
+#ifdef __XEN__
+
+struct x86_emulate_state *
+x86_decode_insn(
+    struct x86_emulate_ctxt *ctxt,
+    int (*insn_fetch)(
+        enum x86_segment seg, unsigned long offset,
+        void *p_data, unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt));
+
+int
+x86_insn_modrm(const struct x86_emulate_state *state,
+               unsigned int *rm, unsigned int *reg);
+unsigned int
+x86_insn_length(const struct x86_emulate_state *state,
+                const struct x86_emulate_ctxt *ctxt);
+
+#ifdef NDEBUG
+static inline void x86_emulate_free_state(struct x86_emulate_state *state) {}
+#else
+void x86_emulate_free_state(struct x86_emulate_state *state);
+#endif
+
+#endif
+
 #endif /* __X86_EMULATE_H__ */
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -54,6 +54,10 @@ void hvm_mem_access_emulate_one(enum emu
 void hvm_emulate_prepare(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
     struct cpu_user_regs *regs);
+void hvm_emulate_init(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const unsigned char *insn_buf,
+    unsigned int insn_bytes);
 void hvm_emulate_writeback(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 struct segment_register *hvmemul_get_seg_reg(
@@ -61,6 +65,11 @@ struct segment_register *hvmemul_get_seg
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla);
 
+int hvmemul_insn_fetch(enum x86_segment seg,
+                       unsigned long offset,
+                       void *p_data,
+                       unsigned int bytes,
+                       struct x86_emulate_ctxt *ctxt);
 int hvmemul_do_pio_buffer(uint16_t port,
                           unsigned int size,
                           uint8_t dir,

Comments

Boris Ostrovsky Sept. 30, 2016, 2:37 p.m. UTC | #1
On 09/30/2016 05:38 AM, Jan Beulich wrote:
> +        if ( opc_tab[instr].opcode == ctxt.ctxt.opcode )
>          {
> -            if ( (inst_len + i) >= fetch_len ) 
> -            {
> -                if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len,
> -                            max_len - fetch_len) )
> -                    return 0;
> -                fetch_len = max_len;
> -            }
> +            if ( !opc_tab[instr].modrm.mod )
> +                return inst_len;

Is there a reason why this is not folded into the 'if' below?

>  
> -            if ( buf[inst_len+i] != opcode[i+1] )
> -                goto mismatch;
> +            if ( modrm_mod == opc_tab[instr].modrm.mod &&
> +                 (modrm_rm & 7) == opc_tab[instr].modrm.rm &&
> +                 (modrm_reg & 7) == opc_tab[instr].modrm.reg )
> +                return inst_len;
>          }

...

> +
> +int
> +x86_insn_modrm(const struct x86_emulate_state *state,
> +               unsigned int *rm, unsigned int *reg)
> +{
> +    check_state(state);
> +
> +    if ( !(state->desc & ModRM) )
> +        return -EINVAL;
> +
> +    if ( rm )
> +        *rm = state->modrm_rm;
> +    if ( reg )
> +        *reg = state->modrm_reg;
> +
> +    return state->modrm_mod;
> +}

Can this return struct modrm (which would then become visible outside of
svm.c)? And then x86_emulate_state can include the same struct instead
of the three separate fields.


-boris
Jan Beulich Sept. 30, 2016, 2:44 p.m. UTC | #2
>>> On 30.09.16 at 16:37, <boris.ostrovsky@oracle.com> wrote:
> On 09/30/2016 05:38 AM, Jan Beulich wrote:
>> +        if ( opc_tab[instr].opcode == ctxt.ctxt.opcode )
>>          {
>> -            if ( (inst_len + i) >= fetch_len ) 
>> -            {
>> -                if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len,
>> -                            max_len - fetch_len) )
>> -                    return 0;
>> -                fetch_len = max_len;
>> -            }
>> +            if ( !opc_tab[instr].modrm.mod )
>> +                return inst_len;
> 
> Is there a reason why this is not folded into the 'if' below?

To keep things readable.

>> +int
>> +x86_insn_modrm(const struct x86_emulate_state *state,
>> +               unsigned int *rm, unsigned int *reg)
>> +{
>> +    check_state(state);
>> +
>> +    if ( !(state->desc & ModRM) )
>> +        return -EINVAL;
>> +
>> +    if ( rm )
>> +        *rm = state->modrm_rm;
>> +    if ( reg )
>> +        *reg = state->modrm_reg;
>> +
>> +    return state->modrm_mod;
>> +}
> 
> Can this return struct modrm (which would then become visible outside of
> svm.c)? And then x86_emulate_state can include the same struct instead
> of the three separate fields.

I'd prefer not to, to leave it to callers which parts they actually care
about. No need for them to put the whole structure on stack when
all they want is e.g. mod.

Jan
Andrew Cooper Sept. 30, 2016, 2:46 p.m. UTC | #3
On 30/09/16 15:44, Jan Beulich wrote:
>>> +int
>>> +x86_insn_modrm(const struct x86_emulate_state *state,
>>> +               unsigned int *rm, unsigned int *reg)
>>> +{
>>> +    check_state(state);
>>> +
>>> +    if ( !(state->desc & ModRM) )
>>> +        return -EINVAL;
>>> +
>>> +    if ( rm )
>>> +        *rm = state->modrm_rm;
>>> +    if ( reg )
>>> +        *reg = state->modrm_reg;
>>> +
>>> +    return state->modrm_mod;
>>> +}
>> Can this return struct modrm (which would then become visible outside of
>> svm.c)? And then x86_emulate_state can include the same struct instead
>> of the three separate fields.
> I'd prefer not to, to leave it to callers which parts they actually care
> about. No need for them to put the whole structure on stack when
> all they want is e.g. mod.

The structure is fine in principle, being only a single byte, but what
can't be represented with it is additions from REX prefixes.

I had considered making structures for ModRM/SIB bytes in the past, but
it makes the logic harder rather than easier.

~Andrew
Jan Beulich Sept. 30, 2016, 2:53 p.m. UTC | #4
>>> On 30.09.16 at 16:46, <andrew.cooper3@citrix.com> wrote:
> On 30/09/16 15:44, Jan Beulich wrote:
>>>> +int
>>>> +x86_insn_modrm(const struct x86_emulate_state *state,
>>>> +               unsigned int *rm, unsigned int *reg)
>>>> +{
>>>> +    check_state(state);
>>>> +
>>>> +    if ( !(state->desc & ModRM) )
>>>> +        return -EINVAL;
>>>> +
>>>> +    if ( rm )
>>>> +        *rm = state->modrm_rm;
>>>> +    if ( reg )
>>>> +        *reg = state->modrm_reg;
>>>> +
>>>> +    return state->modrm_mod;
>>>> +}
>>> Can this return struct modrm (which would then become visible outside of
>>> svm.c)? And then x86_emulate_state can include the same struct instead
>>> of the three separate fields.
>> I'd prefer not to, to leave it to callers which parts they actually care
>> about. No need for them to put the whole structure on stack when
>> all they want is e.g. mod.
> 
> The structure is fine in principle, being only a single byte, but what
> can't be represented with it is additions from REX prefixes.

Oh, because of this I didn't even read it as returning an existing
structure type, but introducing a new one.

Jan
Boris Ostrovsky Sept. 30, 2016, 2:54 p.m. UTC | #5
On 09/30/2016 10:44 AM, Jan Beulich wrote:
>
>>> +int
>>> +x86_insn_modrm(const struct x86_emulate_state *state,
>>> +               unsigned int *rm, unsigned int *reg)
>>> +{
>>> +    check_state(state);
>>> +
>>> +    if ( !(state->desc & ModRM) )
>>> +        return -EINVAL;
>>> +
>>> +    if ( rm )
>>> +        *rm = state->modrm_rm;
>>> +    if ( reg )
>>> +        *reg = state->modrm_reg;
>>> +
>>> +    return state->modrm_mod;
>>> +}
>> Can this return struct modrm (which would then become visible outside of
>> svm.c)? And then x86_emulate_state can include the same struct instead
>> of the three separate fields.
> I'd prefer not to, to leave it to callers which parts they actually care
> about. No need for them to put the whole structure on stack when
> all they want is e.g. mod.

But isn't the whole struct one byte long so you'd not be increasing
amount of data on stack? This will also make comparison at least in
__get_instruction_length_from_list() (and possibly other places) simpler.

-boris
Jan Beulich Sept. 30, 2016, 2:56 p.m. UTC | #6
>>> On 30.09.16 at 16:54, <boris.ostrovsky@oracle.com> wrote:
> On 09/30/2016 10:44 AM, Jan Beulich wrote:
>>
>>>> +int
>>>> +x86_insn_modrm(const struct x86_emulate_state *state,
>>>> +               unsigned int *rm, unsigned int *reg)
>>>> +{
>>>> +    check_state(state);
>>>> +
>>>> +    if ( !(state->desc & ModRM) )
>>>> +        return -EINVAL;
>>>> +
>>>> +    if ( rm )
>>>> +        *rm = state->modrm_rm;
>>>> +    if ( reg )
>>>> +        *reg = state->modrm_reg;
>>>> +
>>>> +    return state->modrm_mod;
>>>> +}
>>> Can this return struct modrm (which would then become visible outside of
>>> svm.c)? And then x86_emulate_state can include the same struct instead
>>> of the three separate fields.
>> I'd prefer not to, to leave it to callers which parts they actually care
>> about. No need for them to put the whole structure on stack when
>> all they want is e.g. mod.
> 
> But isn't the whole struct one byte long so you'd not be increasing
> amount of data on stack? This will also make comparison at least in
> __get_instruction_length_from_list() (and possibly other places) simpler.

See the other reply (as well as Andrew's): We'd be making available
incomplete information if we did it that way.

Jan
Boris Ostrovsky Sept. 30, 2016, 3:03 p.m. UTC | #7
On 09/30/2016 10:56 AM, Jan Beulich wrote:
>>>> On 30.09.16 at 16:54, <boris.ostrovsky@oracle.com> wrote:
>> On 09/30/2016 10:44 AM, Jan Beulich wrote:
>>>>> +int
>>>>> +x86_insn_modrm(const struct x86_emulate_state *state,
>>>>> +               unsigned int *rm, unsigned int *reg)
>>>>> +{
>>>>> +    check_state(state);
>>>>> +
>>>>> +    if ( !(state->desc & ModRM) )
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    if ( rm )
>>>>> +        *rm = state->modrm_rm;
>>>>> +    if ( reg )
>>>>> +        *reg = state->modrm_reg;
>>>>> +
>>>>> +    return state->modrm_mod;
>>>>> +}
>>>> Can this return struct modrm (which would then become visible outside of
>>>> svm.c)? And then x86_emulate_state can include the same struct instead
>>>> of the three separate fields.
>>> I'd prefer not to, to leave it to callers which parts they actually care
>>> about. No need for them to put the whole structure on stack when
>>> all they want is e.g. mod.
>> But isn't the whole struct one byte long so you'd not be increasing
>> amount of data on stack? This will also make comparison at least in
>> __get_instruction_length_from_list() (and possibly other places) simpler.
> See the other reply (as well as Andrew's): We'd be making available
> incomplete information if we did it that way.


Yes, I saw them after I sent my reply.

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
diff mbox

Patch

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -835,7 +835,7 @@  static int hvmemul_read(
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt));
 }
 
-static int hvmemul_insn_fetch(
+int hvmemul_insn_fetch(
     enum x86_segment seg,
     unsigned long offset,
     void *p_data,
@@ -1766,15 +1766,14 @@  static const struct x86_emulate_ops hvm_
     .vmfunc        = hvmemul_vmfunc,
 };
 
-static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
-    const struct x86_emulate_ops *ops)
+void hvm_emulate_init(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const unsigned char *insn_buf,
+    unsigned int insn_bytes)
 {
-    struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
     struct vcpu *curr = current;
-    uint32_t new_intr_shadow, pfec = PFEC_page_present;
-    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+    unsigned int pfec = PFEC_page_present;
     unsigned long addr;
-    int rc;
 
     if ( hvm_long_mode_enabled(curr) &&
          hvmemul_ctxt->seg_reg[x86_seg_cs].attr.fields.l )
@@ -1792,14 +1791,14 @@  static int _hvm_emulate_one(struct hvm_e
     if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
         pfec |= PFEC_user_mode;
 
-    hvmemul_ctxt->insn_buf_eip = regs->eip;
-    if ( !vio->mmio_insn_bytes )
+    hvmemul_ctxt->insn_buf_eip = hvmemul_ctxt->ctxt.regs->eip;
+    if ( !insn_bytes )
     {
         hvmemul_ctxt->insn_buf_bytes =
             hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf) ?:
             (hvm_virtual_to_linear_addr(x86_seg_cs,
                                         &hvmemul_ctxt->seg_reg[x86_seg_cs],
-                                        regs->eip,
+                                        hvmemul_ctxt->insn_buf_eip,
                                         sizeof(hvmemul_ctxt->insn_buf),
                                         hvm_access_insn_fetch,
                                         hvmemul_ctxt->ctxt.addr_size,
@@ -1811,11 +1810,24 @@  static int _hvm_emulate_one(struct hvm_e
     }
     else
     {
-        hvmemul_ctxt->insn_buf_bytes = vio->mmio_insn_bytes;
-        memcpy(hvmemul_ctxt->insn_buf, vio->mmio_insn, vio->mmio_insn_bytes);
+        hvmemul_ctxt->insn_buf_bytes = insn_bytes;
+        memcpy(hvmemul_ctxt->insn_buf, insn_buf, insn_bytes);
     }
 
     hvmemul_ctxt->exn_pending = 0;
+}
+
+static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const struct x86_emulate_ops *ops)
+{
+    const struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
+    struct vcpu *curr = current;
+    uint32_t new_intr_shadow;
+    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+    int rc;
+
+    hvm_emulate_init(hvmemul_ctxt, vio->mmio_insn, vio->mmio_insn_bytes);
+
     vio->mmio_retry = 0;
 
     if ( cpu_has_vmx )
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -15,7 +15,7 @@ 
  * this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <xen/config.h>
+#include <xen/err.h>
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <xen/trace.h>
@@ -26,41 +26,6 @@ 
 #include <asm/hvm/svm/vmcb.h>
 #include <asm/hvm/svm/emulate.h>
 
-static unsigned int is_prefix(u8 opc)
-{
-    switch ( opc )
-    {
-    case 0x66:
-    case 0x67:
-    case 0x2E:
-    case 0x3E:
-    case 0x26:
-    case 0x64:
-    case 0x65:
-    case 0x36:
-    case 0xF0:
-    case 0xF3:
-    case 0xF2:
-    case 0x40 ... 0x4f:
-        return 1;
-    }
-    return 0;
-}
-
-static unsigned long svm_rip2pointer(struct vcpu *v, unsigned long *limit)
-{
-    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    unsigned long p = vmcb->cs.base + vmcb->rip;
-
-    if ( !(vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v)) )
-    {
-        *limit = vmcb->cs.limit;
-        return (u32)p; /* mask to 32 bits */
-    }
-    *limit = ~0UL;
-    return p;
-}
-
 static unsigned long svm_nextrip_insn_length(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -89,141 +54,104 @@  static unsigned long svm_nextrip_insn_le
     return vmcb->nextrip - vmcb->rip;
 }
 
-/* First byte: Length. Following bytes: Opcode bytes. */
-#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ }
-MAKE_INSTR(INVD,   2, 0x0f, 0x08);
-MAKE_INSTR(WBINVD, 2, 0x0f, 0x09);
-MAKE_INSTR(CPUID,  2, 0x0f, 0xa2);
-MAKE_INSTR(RDMSR,  2, 0x0f, 0x32);
-MAKE_INSTR(WRMSR,  2, 0x0f, 0x30);
-MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9);
-MAKE_INSTR(HLT,    1, 0xf4);
-MAKE_INSTR(INT3,   1, 0xcc);
-MAKE_INSTR(RDTSC,  2, 0x0f, 0x31);
-MAKE_INSTR(PAUSE,  1, 0x90);
-MAKE_INSTR(XSETBV, 3, 0x0f, 0x01, 0xd1);
-MAKE_INSTR(VMRUN,  3, 0x0f, 0x01, 0xd8);
-MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
-MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
-MAKE_INSTR(STGI,   3, 0x0f, 0x01, 0xdc);
-MAKE_INSTR(CLGI,   3, 0x0f, 0x01, 0xdd);
-MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf);
-
-static const u8 *const opc_bytes[INSTR_MAX_COUNT] =
-{
-    [INSTR_INVD]   = OPCODE_INVD,
-    [INSTR_WBINVD] = OPCODE_WBINVD,
-    [INSTR_CPUID]  = OPCODE_CPUID,
-    [INSTR_RDMSR]  = OPCODE_RDMSR,
-    [INSTR_WRMSR]  = OPCODE_WRMSR,
-    [INSTR_VMCALL] = OPCODE_VMCALL,
-    [INSTR_HLT]    = OPCODE_HLT,
-    [INSTR_INT3]   = OPCODE_INT3,
-    [INSTR_RDTSC]  = OPCODE_RDTSC,
-    [INSTR_PAUSE]  = OPCODE_PAUSE,
-    [INSTR_XSETBV] = OPCODE_XSETBV,
-    [INSTR_VMRUN]  = OPCODE_VMRUN,
-    [INSTR_VMLOAD] = OPCODE_VMLOAD,
-    [INSTR_VMSAVE] = OPCODE_VMSAVE,
-    [INSTR_STGI]   = OPCODE_STGI,
-    [INSTR_CLGI]   = OPCODE_CLGI,
-    [INSTR_INVLPGA] = OPCODE_INVLPGA,
+static const struct {
+    unsigned int opcode;
+    struct {
+        unsigned int rm:3;
+        unsigned int reg:3;
+        unsigned int mod:2;
+#define MODRM(mod, reg, rm) { rm, reg, mod }
+    } modrm;
+} const opc_tab[INSTR_MAX_COUNT] = {
+    [INSTR_PAUSE]   = { X86EMUL_OPC_F3(0, 0x90) },
+    [INSTR_INT3]    = { X86EMUL_OPC(   0, 0xcc) },
+    [INSTR_HLT]     = { X86EMUL_OPC(   0, 0xf4) },
+    [INSTR_XSETBV]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 2, 1) },
+    [INSTR_VMRUN]   = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 0) },
+    [INSTR_VMCALL]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 1) },
+    [INSTR_VMLOAD]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 2) },
+    [INSTR_VMSAVE]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 3) },
+    [INSTR_STGI]    = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 4) },
+    [INSTR_CLGI]    = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 5) },
+    [INSTR_INVLPGA] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 7) },
+    [INSTR_INVD]    = { X86EMUL_OPC(0x0f, 0x08) },
+    [INSTR_WBINVD]  = { X86EMUL_OPC(0x0f, 0x09) },
+    [INSTR_WRMSR]   = { X86EMUL_OPC(0x0f, 0x30) },
+    [INSTR_RDTSC]   = { X86EMUL_OPC(0x0f, 0x31) },
+    [INSTR_RDMSR]   = { X86EMUL_OPC(0x0f, 0x32) },
+    [INSTR_CPUID]   = { X86EMUL_OPC(0x0f, 0xa2) },
 };
 
-static bool_t fetch(const struct vmcb_struct *vmcb, u8 *buf,
-                    unsigned long addr, unsigned int len)
-{
-    uint32_t pfec = (vmcb_get_cpl(vmcb) == 3) ? PFEC_user_mode : 0;
-
-    switch ( hvm_fetch_from_guest_virt(buf, addr, len, pfec) )
-    {
-    case HVMCOPY_okay:
-        break;
-    case HVMCOPY_bad_gva_to_gfn:
-        /* OK just to give up; we'll have injected #PF already */
-        return 0;
-    default:
-        /* Not OK: fetches from non-RAM pages are not supportable. */
-        gdprintk(XENLOG_WARNING, "Bad instruction fetch at %#lx (%#lx)\n",
-                 vmcb->rip, addr);
-        hvm_inject_hw_exception(TRAP_gp_fault, 0);
-        return 0;
-    }
-    return 1;
-}
-
 int __get_instruction_length_from_list(struct vcpu *v,
         const enum instruction_index *list, unsigned int list_count)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    unsigned int i, j, inst_len = 0;
-    enum instruction_index instr = 0;
-    u8 buf[MAX_INST_LEN];
-    const u8 *opcode = NULL;
-    unsigned long fetch_addr, fetch_limit;
-    unsigned int fetch_len, max_len;
-
+    struct hvm_emulate_ctxt ctxt;
+    struct x86_emulate_state *state;
+    unsigned int inst_len, j, modrm_rm, modrm_reg;
+    int modrm_mod;
+
+    /*
+     * In debug builds, always use x86_decode_insn() and compare with
+     * hardware.
+     */
+#ifdef NDEBUG
     if ( (inst_len = svm_nextrip_insn_length(v)) != 0 )
         return inst_len;
 
     if ( vmcb->exitcode == VMEXIT_IOIO )
         return vmcb->exitinfo2 - vmcb->rip;
+#endif
 
-    /* Fetch up to the next page break; we'll fetch from the next page
-     * later if we have to. */
-    fetch_addr = svm_rip2pointer(v, &fetch_limit);
-    if ( vmcb->rip > fetch_limit )
-        return 0;
-    max_len = min(fetch_limit - vmcb->rip + 1, MAX_INST_LEN + 0UL);
-    fetch_len = min_t(unsigned int, max_len,
-                      PAGE_SIZE - (fetch_addr & ~PAGE_MASK));
-    if ( !fetch(vmcb, buf, fetch_addr, fetch_len) )
+    ASSERT(v == current);
+    hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
+    hvm_emulate_init(&ctxt, NULL, 0);
+    state = x86_decode_insn(&ctxt.ctxt, hvmemul_insn_fetch);
+    if ( IS_ERR_OR_NULL(state) )
         return 0;
 
-    while ( (inst_len < max_len) && is_prefix(buf[inst_len]) )
-    {
-        inst_len++;
-        if ( inst_len >= fetch_len )
-        {
-            if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len,
-                        max_len - fetch_len) )
-                return 0;
-            fetch_len = max_len;
-        }
+    inst_len = x86_insn_length(state, &ctxt.ctxt);
+    modrm_mod = x86_insn_modrm(state, &modrm_rm, &modrm_reg);
+    x86_emulate_free_state(state);
+#ifndef NDEBUG
+    if ( vmcb->exitcode == VMEXIT_IOIO )
+        j = vmcb->exitinfo2 - vmcb->rip;
+    else
+        j = svm_nextrip_insn_length(v);
+    if ( j && j != inst_len )
+    {
+        gprintk(XENLOG_WARNING, "insn-len[%02x]=%u (exp %u)\n",
+                ctxt.ctxt.opcode, inst_len, j);
+        return j;
     }
+#endif
 
     for ( j = 0; j < list_count; j++ )
     {
-        instr = list[j];
-        opcode = opc_bytes[instr];
+        unsigned int instr = list[j];
 
-        for ( i = 0; (i < opcode[0]) && ((inst_len + i) < max_len); i++ )
+        if ( instr >= ARRAY_SIZE(opc_tab) )
+        {
+            ASSERT_UNREACHABLE();
+            break;
+        }
+        if ( opc_tab[instr].opcode == ctxt.ctxt.opcode )
         {
-            if ( (inst_len + i) >= fetch_len ) 
-            {
-                if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len,
-                            max_len - fetch_len) )
-                    return 0;
-                fetch_len = max_len;
-            }
+            if ( !opc_tab[instr].modrm.mod )
+                return inst_len;
 
-            if ( buf[inst_len+i] != opcode[i+1] )
-                goto mismatch;
+            if ( modrm_mod == opc_tab[instr].modrm.mod &&
+                 (modrm_rm & 7) == opc_tab[instr].modrm.rm &&
+                 (modrm_reg & 7) == opc_tab[instr].modrm.reg )
+                return inst_len;
         }
-        goto done;
-    mismatch: ;
     }
 
     gdprintk(XENLOG_WARNING,
-             "%s: Mismatch between expected and actual instruction bytes: "
+             "%s: Mismatch between expected and actual instruction: "
              "eip = %lx\n",  __func__, (unsigned long)vmcb->rip);
     hvm_inject_hw_exception(TRAP_gp_fault, 0);
     return 0;
-
- done:
-    inst_len += opcode[0];
-    ASSERT(inst_len <= max_len);
-    return inst_len;
 }
 
 /*
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -382,9 +382,9 @@  struct operand {
     } mem;
 };
 #ifdef __x86_64__
-#define REG_POISON ((unsigned long *) 0x8086000000008086UL) /* non-canonical */
+#define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
 #else
-#define REG_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
+#define PTR_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
 #endif
 
 typedef union {
@@ -1646,6 +1646,14 @@  struct x86_emulate_state {
 
     unsigned long eip;
     struct cpu_user_regs *regs;
+
+#ifndef NDEBUG
+    /*
+     * Track caller of x86_decode_insn() to spot missing as well as
+     * premature calls to x86_emulate_free_state().
+     */
+    void *caller;
+#endif
 };
 
 /* Helper definitions. */
@@ -1673,6 +1681,11 @@  x86_decode_onebyte(
 
     switch ( ctxt->opcode )
     {
+    case 0x90: /* nop / pause */
+        if ( repe_prefix() )
+            ctxt->opcode |= X86EMUL_OPC_F3(0, 0);
+        break;
+
     case 0x9a: /* call (far, absolute) */
     case 0xea: /* jmp (far, absolute) */
         generate_exception_if(mode_64bit(), EXC_UD, -1);
@@ -1780,7 +1793,7 @@  x86_decode(
     override_seg = -1;
     ea.type = OP_MEM;
     ea.mem.seg = x86_seg_ds;
-    ea.reg = REG_POISON;
+    ea.reg = PTR_POISON;
     state->regs = ctxt->regs;
     state->eip = ctxt->regs->eip;
 
@@ -2267,8 +2280,8 @@  x86_emulate(
     struct x86_emulate_state state;
     int rc;
     uint8_t b, d;
-    struct operand src = { .reg = REG_POISON };
-    struct operand dst = { .reg = REG_POISON };
+    struct operand src = { .reg = PTR_POISON };
+    struct operand dst = { .reg = PTR_POISON };
     enum x86_swint_type swint_type;
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
@@ -2861,8 +2874,9 @@  x86_emulate(
         break;
 
     case 0x90: /* nop / xchg %%r8,%%rax */
+    case X86EMUL_OPC_F3(0, 0x90): /* pause / xchg %%r8,%%rax */
         if ( !(rex_prefix & 1) )
-            break; /* nop */
+            break; /* nop / pause */
         /* fall through */
 
     case 0x91 ... 0x97: /* xchg reg,%%rax */
@@ -5199,3 +5213,89 @@  x86_emulate(
 #undef vex
 #undef override_seg
 #undef ea
+
+#ifdef __XEN__
+
+#include <xen/err.h>
+
+struct x86_emulate_state *
+x86_decode_insn(
+    struct x86_emulate_ctxt *ctxt,
+    int (*insn_fetch)(
+        enum x86_segment seg, unsigned long offset,
+        void *p_data, unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt))
+{
+    static DEFINE_PER_CPU(struct x86_emulate_state, state);
+    struct x86_emulate_state *state = &this_cpu(state);
+    const struct x86_emulate_ops ops = {
+        .insn_fetch = insn_fetch,
+        .read       = x86emul_unhandleable_rw,
+        .write      = PTR_POISON,
+        .cmpxchg    = PTR_POISON,
+    };
+    int rc = x86_decode(state, ctxt, &ops);
+
+    if ( unlikely(rc != X86EMUL_OKAY) )
+        return ERR_PTR(-rc);
+
+#ifndef NDEBUG
+    /*
+     * While we avoid memory allocation (by use of per-CPU data) above,
+     * nevertheless make sure callers properly release the state structure
+     * for forward compatibility.
+     */
+    if ( state->caller )
+    {
+        printk(XENLOG_ERR "Unreleased emulation state acquired by %ps\n",
+               state->caller);
+        dump_execution_state();
+    }
+    state->caller = __builtin_return_address(0);
+#endif
+
+    return state;
+}
+
+static inline void check_state(const struct x86_emulate_state *state)
+{
+#ifndef NDEBUG
+    ASSERT(state->caller);
+#endif
+}
+
+#ifndef NDEBUG
+void x86_emulate_free_state(struct x86_emulate_state *state)
+{
+    check_state(state);
+    state->caller = NULL;
+}
+#endif
+
+int
+x86_insn_modrm(const struct x86_emulate_state *state,
+               unsigned int *rm, unsigned int *reg)
+{
+    check_state(state);
+
+    if ( !(state->desc & ModRM) )
+        return -EINVAL;
+
+    if ( rm )
+        *rm = state->modrm_rm;
+    if ( reg )
+        *reg = state->modrm_reg;
+
+    return state->modrm_mod;
+}
+
+unsigned int
+x86_insn_length(const struct x86_emulate_state *state,
+                const struct x86_emulate_ctxt *ctxt)
+{
+    check_state(state);
+
+    return state->eip - ctxt->regs->eip;
+}
+
+#endif
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -532,4 +532,29 @@  x86emul_unhandleable_rw(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt);
 
+#ifdef __XEN__
+
+struct x86_emulate_state *
+x86_decode_insn(
+    struct x86_emulate_ctxt *ctxt,
+    int (*insn_fetch)(
+        enum x86_segment seg, unsigned long offset,
+        void *p_data, unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt));
+
+int
+x86_insn_modrm(const struct x86_emulate_state *state,
+               unsigned int *rm, unsigned int *reg);
+unsigned int
+x86_insn_length(const struct x86_emulate_state *state,
+                const struct x86_emulate_ctxt *ctxt);
+
+#ifdef NDEBUG
+static inline void x86_emulate_free_state(struct x86_emulate_state *state) {}
+#else
+void x86_emulate_free_state(struct x86_emulate_state *state);
+#endif
+
+#endif
+
 #endif /* __X86_EMULATE_H__ */
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -54,6 +54,10 @@  void hvm_mem_access_emulate_one(enum emu
 void hvm_emulate_prepare(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
     struct cpu_user_regs *regs);
+void hvm_emulate_init(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const unsigned char *insn_buf,
+    unsigned int insn_bytes);
 void hvm_emulate_writeback(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 struct segment_register *hvmemul_get_seg_reg(
@@ -61,6 +65,11 @@  struct segment_register *hvmemul_get_seg
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla);
 
+int hvmemul_insn_fetch(enum x86_segment seg,
+                       unsigned long offset,
+                       void *p_data,
+                       unsigned int bytes,
+                       struct x86_emulate_ctxt *ctxt);
 int hvmemul_do_pio_buffer(uint16_t port,
                           unsigned int size,
                           uint8_t dir,