diff mbox

[v2,17/17] x86/HVM: eliminate custom #MF/#XM handling

Message ID 59BABAC1020000780017B3FF@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Sept. 14, 2017, 3:22 p.m. UTC
Use the generic stub exception handling instead.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Re-base.

Comments

Paul Durrant Sept. 18, 2017, 9:27 a.m. UTC | #1
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 14 September 2017 16:22
> To: xen-devel <xen-devel@lists.xenproject.org>
> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; Paul Durrant
> <Paul.Durrant@citrix.com>; George Dunlap <George.Dunlap@citrix.com>
> Subject: [PATCH v2 17/17] x86/HVM: eliminate custom #MF/#XM handling
> 
> Use the generic stub exception handling instead.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

The changes look ok to me, but I don't really have any context as to why separate exception handling might have been desirable in the first place.

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
> v2: Re-base.
> 
> --- a/tools/tests/x86_emulator/x86_emulate.c
> +++ b/tools/tests/x86_emulator/x86_emulate.c
> @@ -134,8 +134,6 @@ int emul_test_read_xcr(
>  }
> 
>  int emul_test_get_fpu(
> -    void (*exception_callback)(void *, struct cpu_user_regs *),
> -    void *exception_callback_arg,
>      enum x86_emulate_fpu_type type,
>      struct x86_emulate_ctxt *ctxt)
>  {
> --- a/tools/tests/x86_emulator/x86_emulate.h
> +++ b/tools/tests/x86_emulator/x86_emulate.h
> @@ -221,8 +221,6 @@ int emul_test_read_xcr(
>      struct x86_emulate_ctxt *ctxt);
> 
>  int emul_test_get_fpu(
> -    void (*exception_callback)(void *, struct cpu_user_regs *),
> -    void *exception_callback_arg,
>      enum x86_emulate_fpu_type type,
>      struct x86_emulate_ctxt *ctxt);
> 
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1739,8 +1739,6 @@ int hvmemul_cpuid(uint32_t leaf, uint32_
>  }
> 
>  static int hvmemul_get_fpu(
> -    void (*exception_callback)(void *, struct cpu_user_regs *),
> -    void *exception_callback_arg,
>      enum x86_emulate_fpu_type type,
>      struct x86_emulate_ctxt *ctxt)
>  {
> @@ -1778,9 +1776,6 @@ static int hvmemul_get_fpu(
>          }
>      }
> 
> -    curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
> -    curr->arch.hvm_vcpu.fpu_exception_callback_arg =
> exception_callback_arg;
> -
>      return X86EMUL_OKAY;
>  }
> 
> @@ -1791,8 +1786,6 @@ static void hvmemul_put_fpu(
>  {
>      struct vcpu *curr = current;
> 
> -    curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
> -
>      if ( aux )
>      {
>          typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr-
> >arch.fpu_ctxt;
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -703,7 +703,6 @@ void do_reserved_trap(struct cpu_user_re
> 
>  void do_trap(struct cpu_user_regs *regs)
>  {
> -    struct vcpu *curr = current;
>      unsigned int trapnr = regs->entry_vector;
>      unsigned long fixup;
> 
> @@ -723,15 +722,6 @@ void do_trap(struct cpu_user_regs *regs)
>          return;
>      }
> 
> -    if ( ((trapnr == TRAP_copro_error) || (trapnr == TRAP_simd_error)) &&
> -         system_state >= SYS_STATE_active && is_hvm_vcpu(curr) &&
> -         curr->arch.hvm_vcpu.fpu_exception_callback )
> -    {
> -        curr->arch.hvm_vcpu.fpu_exception_callback(
> -            curr->arch.hvm_vcpu.fpu_exception_callback_arg, regs);
> -        return;
> -    }
> -
>      if ( likely((fixup = search_exception_table(regs)) != 0) )
>      {
>          dprintk(XENLOG_ERR, "Trap %u: %p [%ps] -> %p\n",
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -959,6 +959,33 @@ static inline int mkec(uint8_t e, int32_
>  #define generate_exception(e, ec...) generate_exception_if(true, e, ##ec)
> 
>  #ifdef __XEN__
> +static int exception_from_stub(union stub_exception_token res,
> +                               void *stub, unsigned int line,
> +                               struct x86_emulate_ctxt *ctxt,
> +                               const struct x86_emulate_ops *ops)
> +{
> +    int rc = X86EMUL_UNHANDLEABLE;
> +
> +    generate_exception_if(res.fields.trapnr == EXC_MF, EXC_MF);
> +    if ( res.fields.trapnr == EXC_XM )
> +    {
> +        unsigned long cr4;
> +
> +        if ( !ops->read_cr || !ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY )
> +            cr4 = X86_CR4_OSXMMEXCPT;
> +        generate_exception(cr4 & X86_CR4_OSXMMEXCPT ? EXC_XM :
> EXC_UD);
> +    }
> +    gprintk(XENLOG_WARNING,
> +            "exception %u (ec=%04x) in emulation stub (line %u)\n",
> +            res.fields.trapnr, res.fields.ec, line);
> +    gprintk(XENLOG_INFO, "stub: %"__stringify(MAX_INST_LEN)"ph\n",
> stub);
> +    generate_exception_if(res.fields.trapnr == EXC_UD, EXC_UD);
> +    domain_crash(current->domain);
> +
> + done:
> +    return rc;
> +}
> +
>  # define invoke_stub(pre, post, constraints...) do {                    \
>      union stub_exception_token res_ = { .raw = ~0 };                    \
>      asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n"                \
> @@ -974,14 +1001,8 @@ static inline int mkec(uint8_t e, int32_
>                       "m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) );   \
>      if ( unlikely(~res_.raw) )                                          \
>      {                                                                   \
> -        gprintk(XENLOG_WARNING,                                         \
> -                "exception %u (ec=%04x) in emulation stub (line %u)\n", \
> -                res_.fields.trapnr, res_.fields.ec, __LINE__);          \
> -        gprintk(XENLOG_INFO, "stub: %"__stringify(MAX_INST_LEN)"ph\n",  \
> -                stub.func);                                             \
> -        generate_exception_if(res_.fields.trapnr == EXC_UD, EXC_UD);    \
> -        domain_crash(current->domain);                                  \
> -        goto cannot_emulate;                                            \
> +        rc = exception_from_stub(res_, stub.func, __LINE__, ctxt, ops); \
> +        goto done;                                                      \
>      }                                                                   \
>  } while (0)
>  #else
> @@ -1097,23 +1118,8 @@ do {
>      ops->write_segment(x86_seg_cs, cs, ctxt);                           \
>  })
> 
> -struct fpu_insn_ctxt {
> -    uint8_t insn_bytes;
> -    uint8_t type;
> -    int8_t exn_raised;
> -};
> -
> -static void fpu_handle_exception(void *_fic, struct cpu_user_regs *regs)
> -{
> -    struct fpu_insn_ctxt *fic = _fic;
> -    ASSERT(regs->entry_vector < 0x20);
> -    fic->exn_raised = regs->entry_vector;
> -    regs->r(ip) += fic->insn_bytes;
> -}
> -
>  static int _get_fpu(
>      enum x86_emulate_fpu_type type,
> -    struct fpu_insn_ctxt *fic,
>      struct x86_emulate_ctxt *ctxt,
>      const struct x86_emulate_ops *ops)
>  {
> @@ -1138,14 +1144,13 @@ static int _get_fpu(
>          break;
>      }
> 
> -    rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
> +    rc = ops->get_fpu(type, ctxt);
> 
>      if ( rc == X86EMUL_OKAY )
>      {
>          unsigned long cr0;
> 
>          fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
> -        fic->type = type;
> 
>          fail_if(!ops->read_cr);
>          if ( type >= X86EMUL_FPU_xmm )
> @@ -1183,37 +1188,22 @@ static int _get_fpu(
>      return rc;
>  }
> 
> -#define get_fpu(_type, _fic)                                    \
> +#define get_fpu(type)                                           \
>  do {                                                            \
> -    rc = _get_fpu(_type, _fic, ctxt, ops);                      \
> +    rc = _get_fpu(fpu_type = (type), ctxt, ops);                \
>      if ( rc ) goto done;                                        \
>  } while (0)
> 
> -#define check_fpu_exn(fic)                                      \
> -do {                                                            \
> -    generate_exception_if((fic)->exn_raised >= 0,               \
> -                          (fic)->exn_raised);                   \
> -} while (0)
> -
> -#define check_xmm_exn(fic)                                      \
> -do {                                                            \
> -    if ( (fic)->exn_raised == EXC_XM && ops->read_cr &&         \
> -         ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&         \
> -         !(cr4 & X86_CR4_OSXMMEXCPT) )                          \
> -        (fic)->exn_raised = EXC_UD;                             \
> -    check_fpu_exn(fic);                                         \
> -} while (0)
> -
>  static void put_fpu(
> -    struct fpu_insn_ctxt *fic,
> +    enum x86_emulate_fpu_type type,
>      bool failed_late,
>      const struct x86_emulate_state *state,
>      struct x86_emulate_ctxt *ctxt,
>      const struct x86_emulate_ops *ops)
>  {
> -    if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
> +    if ( unlikely(failed_late) && type == X86EMUL_FPU_fpu )
>          ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
> -    else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
> +    else if ( unlikely(type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
>      {
>          struct x86_emul_fpu_aux aux = {
>              .ip = ctxt->regs->r(ip),
> @@ -1247,9 +1237,8 @@ static void put_fpu(
>          }
>          ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
>      }
> -    else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
> +    else if ( type != X86EMUL_FPU_none && ops->put_fpu )
>          ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
> -    fic->type = X86EMUL_FPU_none;
>  }
> 
>  static inline bool fpu_check_write(void)
> @@ -1264,29 +1253,27 @@ static inline bool fpu_check_write(void)
>  #define emulate_fpu_insn_memdst(opc, ext, arg)                          \
>  do {                                                                    \
>      /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
> -    fic.insn_bytes = 2;                                                 \
> +    insn_bytes = 2;                                                     \
>      memcpy(get_stub(stub),                                              \
>             ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
> -    invoke_stub("", "", "+m" (fic), "+m" (arg) : "a" (&(arg)));         \
> +    invoke_stub("", "", "+m" (arg) : "a" (&(arg)));                     \
>      put_stub(stub);                                                     \
>  } while (0)
> 
>  #define emulate_fpu_insn_memsrc(opc, ext, arg)                          \
>  do {                                                                    \
>      /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
> -    fic.insn_bytes = 2;                                                 \
>      memcpy(get_stub(stub),                                              \
>             ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
> -    invoke_stub("", "", "+m" (fic) : "m" (arg), "a" (&(arg)));          \
> +    invoke_stub("", "", "=m" (dummy) : "m" (arg), "a" (&(arg)));        \
>      put_stub(stub);                                                     \
>  } while (0)
> 
>  #define emulate_fpu_insn_stub(bytes...)                                 \
>  do {                                                                    \
>      unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
> -    fic.insn_bytes = nr_;                                               \
>      memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
> -    invoke_stub("", "", "=m" (fic) : "m" (fic));                        \
> +    invoke_stub("", "", "=m" (dummy) : "i" (0));                        \
>      put_stub(stub);                                                     \
>  } while (0)
> 
> @@ -1294,12 +1281,10 @@ do {
>  do {                                                                    \
>      unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
>      unsigned long tmp_;                                                 \
> -    fic.insn_bytes = nr_;                                               \
>      memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
>      invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),             \
>                  _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),            \
> -                [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_),       \
> -                "+m" (fic)                                              \
> +                [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_)        \
>                  : [mask] "i" (X86_EFLAGS_ZF|X86_EFLAGS_PF|X86_EFLAGS_CF)); \
>      put_stub(stub);                                                     \
>  } while (0)
> @@ -3142,14 +3127,14 @@ x86_emulate(
>      struct x86_emulate_state state;
>      int rc;
>      uint8_t b, d, *opc = NULL;
> -    unsigned int first_byte = 0;
> +    unsigned int first_byte = 0, insn_bytes = 0;
>      bool singlestep = (_regs.eflags & X86_EFLAGS_TF) &&
>  	    !is_branch_step(ctxt, ops);
>      bool sfence = false;
>      struct operand src = { .reg = PTR_POISON };
>      struct operand dst = { .reg = PTR_POISON };
>      unsigned long cr4;
> -    struct fpu_insn_ctxt fic = { .type = X86EMUL_FPU_none, .exn_raised = -1
> };
> +    enum x86_emulate_fpu_type fpu_type = X86EMUL_FPU_none;
>      struct x86_emulate_stub stub = {};
>      DECLARE_ALIGNED(mmval_t, mmval);
> 
> @@ -3839,9 +3824,8 @@ x86_emulate(
> 
>      case 0x9b:  /* wait/fwait */
>          host_and_vcpu_must_have(fpu);
> -        get_fpu(X86EMUL_FPU_wait, &fic);
> +        get_fpu(X86EMUL_FPU_wait);
>          emulate_fpu_insn_stub(b);
> -        check_fpu_exn(&fic);
>          break;
> 
>      case 0x9c: /* pushf */
> @@ -4245,7 +4229,7 @@ x86_emulate(
> 
>      case 0xd8: /* FPU 0xd8 */
>          host_and_vcpu_must_have(fpu);
> -        get_fpu(X86EMUL_FPU_fpu, &fic);
> +        get_fpu(X86EMUL_FPU_fpu);
>          switch ( modrm )
>          {
>          case 0xc0 ... 0xc7: /* fadd %stN,%st */
> @@ -4267,12 +4251,11 @@ x86_emulate(
>              emulate_fpu_insn_memsrc(b, modrm_reg, src.val);
>              break;
>          }
> -        check_fpu_exn(&fic);
>          break;
> 
>      case 0xd9: /* FPU 0xd9 */
>          host_and_vcpu_must_have(fpu);
> -        get_fpu(X86EMUL_FPU_fpu, &fic);
> +        get_fpu(X86EMUL_FPU_fpu);
>          switch ( modrm )
>          {
>          case 0xfb: /* fsincos */
> @@ -4354,12 +4337,11 @@ x86_emulate(
>              if ( dst.type == OP_MEM && !state->fpu_ctrl && !fpu_check_write() )
>                  dst.type = OP_NONE;
>          }
> -        check_fpu_exn(&fic);
>          break;
> 
>      case 0xda: /* FPU 0xda */
>          host_and_vcpu_must_have(fpu);
> -        get_fpu(X86EMUL_FPU_fpu, &fic);
> +        get_fpu(X86EMUL_FPU_fpu);
>          switch ( modrm )
>          {
>          case 0xc0 ... 0xc7: /* fcmovb %stN */
> @@ -4376,12 +4358,11 @@ x86_emulate(
>              generate_exception_if(ea.type != OP_MEM, EXC_UD);
>              goto fpu_memsrc32;
>          }
> -        check_fpu_exn(&fic);
>          break;
> 
>      case 0xdb: /* FPU 0xdb */
>          host_and_vcpu_must_have(fpu);
> -        get_fpu(X86EMUL_FPU_fpu, &fic);
> +        get_fpu(X86EMUL_FPU_fpu);
>          switch ( modrm )
>          {
>          case 0xc0 ... 0xc7: /* fcmovnb %stN */
> @@ -4434,12 +4415,11 @@ x86_emulate(
>                  generate_exception(EXC_UD);
>              }
>          }
> -        check_fpu_exn(&fic);
>          break;
> 
>      case 0xdc: /* FPU 0xdc */
>          host_and_vcpu_must_have(fpu);
> -        get_fpu(X86EMUL_FPU_fpu, &fic);
> +        get_fpu(X86EMUL_FPU_fpu);
>          switch ( modrm )
>          {
>          case 0xc0 ... 0xc7: /* fadd %st,%stN */
> @@ -4461,12 +4441,11 @@ x86_emulate(
>              emulate_fpu_insn_memsrc(b, modrm_reg, src.val);
>              break;
>          }
> -        check_fpu_exn(&fic);
>          break;
> 
>      case 0xdd: /* FPU 0xdd */
>          host_and_vcpu_must_have(fpu);
> -        get_fpu(X86EMUL_FPU_fpu, &fic);
> +        get_fpu(X86EMUL_FPU_fpu);
>          switch ( modrm )
>          {
>          case 0xc0 ... 0xc7: /* ffree %stN */
> @@ -4510,12 +4489,11 @@ x86_emulate(
>              if ( dst.type == OP_MEM && !state->fpu_ctrl && !fpu_check_write() )
>                  dst.type = OP_NONE;
>          }
> -        check_fpu_exn(&fic);
>          break;
> 
>      case 0xde: /* FPU 0xde */
>          host_and_vcpu_must_have(fpu);
> -        get_fpu(X86EMUL_FPU_fpu, &fic);
> +        get_fpu(X86EMUL_FPU_fpu);
>          switch ( modrm )
>          {
>          case 0xc0 ... 0xc7: /* faddp %stN */
> @@ -4533,12 +4511,11 @@ x86_emulate(
>              emulate_fpu_insn_memsrc(b, modrm_reg, src.val);
>              break;
>          }
> -        check_fpu_exn(&fic);
>          break;
> 
>      case 0xdf: /* FPU 0xdf */
>          host_and_vcpu_must_have(fpu);
> -        get_fpu(X86EMUL_FPU_fpu, &fic);
> +        get_fpu(X86EMUL_FPU_fpu);
>          switch ( modrm )
>          {
>          case 0xe0:
> @@ -4583,7 +4560,6 @@ x86_emulate(
>                  goto fpu_memdst64;
>              }
>          }
> -        check_fpu_exn(&fic);
>          break;
> 
>      case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
> @@ -5415,7 +5391,7 @@ x86_emulate(
>          else
>              generate_exception(EXC_UD);
> 
> -        get_fpu(X86EMUL_FPU_mmx, &fic);
> +        get_fpu(X86EMUL_FPU_mmx);
> 
>          d = DstReg | SrcMem;
>          op_bytes = 8;
> @@ -5505,7 +5481,7 @@ x86_emulate(
>              else
>                  vcpu_must_have(sse);
>      simd_0f_xmm:
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
> @@ -5515,7 +5491,7 @@ x86_emulate(
>      simd_0f_avx:
>              host_and_vcpu_must_have(avx);
>      simd_0f_ymm:
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
>      simd_0f_common:
>          opc = init_prefixes(stub);
> @@ -5528,7 +5504,7 @@ x86_emulate(
>              vex.b = 1;
>              opc[1] &= 0x38;
>          }
> -        fic.insn_bytes = PFX_BYTES + 2;
> +        insn_bytes = PFX_BYTES + 2;
>          break;
> 
>      case X86EMUL_OPC_66(0x0f, 0x12):       /* movlpd m64,xmm */
> @@ -5615,12 +5591,12 @@ x86_emulate(
>                  vcpu_must_have(sse2);
>              else
>                  vcpu_must_have(sse);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              host_and_vcpu_must_have(avx);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
> 
>          if ( ea.type == OP_MEM )
> @@ -5646,14 +5622,14 @@ x86_emulate(
>                  vcpu_must_have(sse2);
>              else
>                  vcpu_must_have(sse);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              generate_exception_if(vex.reg != 0xf, EXC_UD);
>              vex.l = 0;
>              host_and_vcpu_must_have(avx);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
> 
>          opc = init_prefixes(stub);
> @@ -5676,17 +5652,14 @@ x86_emulate(
>              opc[1] = modrm & 0xc7;
>          if ( !mode_64bit() )
>              vex.w = 0;
> -        fic.insn_bytes = PFX_BYTES + 2;
> +        insn_bytes = PFX_BYTES + 2;
>          opc[2] = 0xc3;
> 
>          copy_REX_VEX(opc, rex_prefix, vex);
>          ea.reg = decode_register(modrm_reg, &_regs, 0);
> -        invoke_stub("", "", "=a" (*ea.reg), "+m" (fic.exn_raised)
> -                            : "c" (mmvalp), "m" (*mmvalp));
> +        invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp));
> 
>          put_stub(stub);
> -        check_xmm_exn(&fic);
> -
>          state->simd_size = simd_none;
>          break;
> 
> @@ -5700,13 +5673,13 @@ x86_emulate(
>                  vcpu_must_have(sse2);
>              else
>                  vcpu_must_have(sse);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              generate_exception_if(vex.reg != 0xf, EXC_UD);
>              host_and_vcpu_must_have(avx);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
> 
>          opc = init_prefixes(stub);
> @@ -5724,20 +5697,17 @@ x86_emulate(
>              vex.b = 1;
>              opc[1] &= 0x38;
>          }
> -        fic.insn_bytes = PFX_BYTES + 2;
> +        insn_bytes = PFX_BYTES + 2;
>          opc[2] = 0xc3;
> 
>          copy_REX_VEX(opc, rex_prefix, vex);
>          invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),
>                      _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
>                      [eflags] "+g" (_regs.eflags),
> -                    [tmp] "=&r" (dummy), "+m" (*mmvalp),
> -                    "+m" (fic.exn_raised)
> +                    [tmp] "=&r" (dummy), "+m" (*mmvalp)
>                      : "a" (mmvalp), [mask] "i" (EFLAGS_MASK));
> 
>          put_stub(stub);
> -        check_xmm_exn(&fic);
> -
>          ASSERT(!state->simd_size);
>          break;
> 
> @@ -5875,9 +5845,9 @@ x86_emulate(
>          if ( !mode_64bit() )
>              vex.w = 0;
>          opc[1] = modrm & 0xc7;
> -        fic.insn_bytes = PFX_BYTES + 2;
> +        insn_bytes = PFX_BYTES + 2;
>      simd_0f_to_gpr:
> -        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
> +        opc[insn_bytes - PFX_BYTES] = 0xc3;
> 
>          generate_exception_if(ea.type != OP_REG, EXC_UD);
> 
> @@ -5896,9 +5866,9 @@ x86_emulate(
>                      vcpu_must_have(sse);
>              }
>              if ( b == 0x50 || (vex.pfx & VEX_PREFIX_DOUBLE_MASK) )
> -                get_fpu(X86EMUL_FPU_xmm, &fic);
> +                get_fpu(X86EMUL_FPU_xmm);
>              else
> -                get_fpu(X86EMUL_FPU_mmx, &fic);
> +                get_fpu(X86EMUL_FPU_mmx);
>          }
>          else
>          {
> @@ -5907,14 +5877,13 @@ x86_emulate(
>                  host_and_vcpu_must_have(avx);
>              else
>                  host_and_vcpu_must_have(avx2);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
> 
>          copy_REX_VEX(opc, rex_prefix, vex);
>          invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
> 
>          put_stub(stub);
> -        check_xmm_exn(&fic);
> 
>          ASSERT(!state->simd_size);
>          dst.bytes = 4;
> @@ -6080,7 +6049,7 @@ x86_emulate(
>              goto simd_0f_sse2;
>      simd_0f_mmx:
>          host_and_vcpu_must_have(mmx);
> -        get_fpu(X86EMUL_FPU_mmx, &fic);
> +        get_fpu(X86EMUL_FPU_mmx);
>          goto simd_0f_common;
> 
>      CASE_SIMD_PACKED_INT(0x0f, 0x6e):    /* mov{d,q} r/m,{,x}mm */
> @@ -6091,17 +6060,17 @@ x86_emulate(
>          {
>              generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
>              host_and_vcpu_must_have(avx);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
>          else if ( vex.pfx )
>          {
>              vcpu_must_have(sse2);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              host_and_vcpu_must_have(mmx);
> -            get_fpu(X86EMUL_FPU_mmx, &fic);
> +            get_fpu(X86EMUL_FPU_mmx);
>          }
> 
>      simd_0f_rm:
> @@ -6113,17 +6082,14 @@ x86_emulate(
>          if ( !mode_64bit() )
>              vex.w = 0;
>          opc[1] = modrm & 0x38;
> -        fic.insn_bytes = PFX_BYTES + 2;
> +        insn_bytes = PFX_BYTES + 2;
>          opc[2] = 0xc3;
> 
>          copy_REX_VEX(opc, rex_prefix, vex);
> -        invoke_stub("", "", "+m" (src.val), "+m" (fic.exn_raised)
> -                            : "a" (&src.val));
> +        invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
>          dst.val = src.val;
> 
>          put_stub(stub);
> -        check_xmm_exn(&fic);
> -
>          ASSERT(!state->simd_size);
>          break;
> 
> @@ -6189,19 +6155,19 @@ x86_emulate(
>                  host_and_vcpu_must_have(avx);
>              }
>      simd_0f_imm8_ymm:
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
>          else if ( vex.pfx )
>          {
>      simd_0f_imm8_sse2:
>              vcpu_must_have(sse2);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              host_and_vcpu_must_have(mmx);
>              vcpu_must_have(mmxext);
> -            get_fpu(X86EMUL_FPU_mmx, &fic);
> +            get_fpu(X86EMUL_FPU_mmx);
>          }
>      simd_0f_imm8:
>          opc = init_prefixes(stub);
> @@ -6215,7 +6181,7 @@ x86_emulate(
>              opc[1] &= 0x38;
>          }
>          opc[2] = imm1;
> -        fic.insn_bytes = PFX_BYTES + 3;
> +        insn_bytes = PFX_BYTES + 3;
>          break;
> 
>      CASE_SIMD_PACKED_INT(0x0f, 0x71):    /* Grp12 */
> @@ -6243,33 +6209,31 @@ x86_emulate(
>                  host_and_vcpu_must_have(avx2);
>              else
>                  host_and_vcpu_must_have(avx);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
>          else if ( vex.pfx )
>          {
>              vcpu_must_have(sse2);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              host_and_vcpu_must_have(mmx);
> -            get_fpu(X86EMUL_FPU_mmx, &fic);
> +            get_fpu(X86EMUL_FPU_mmx);
>          }
> 
>          opc = init_prefixes(stub);
>          opc[0] = b;
>          opc[1] = modrm;
>          opc[2] = imm1;
> -        fic.insn_bytes = PFX_BYTES + 3;
> +        insn_bytes = PFX_BYTES + 3;
>      simd_0f_reg_only:
> -        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
> +        opc[insn_bytes - PFX_BYTES] = 0xc3;
> 
>          copy_REX_VEX(opc, rex_prefix, vex);
>          invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
> 
>          put_stub(stub);
> -        check_xmm_exn(&fic);
> -
>          ASSERT(!state->simd_size);
>          break;
> 
> @@ -6304,7 +6268,7 @@ x86_emulate(
>          {
>              generate_exception_if(vex.reg != 0xf, EXC_UD);
>              host_and_vcpu_must_have(avx);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
> 
>  #ifdef __x86_64__
>              if ( !mode_64bit() )
> @@ -6346,12 +6310,12 @@ x86_emulate(
>          else
>          {
>              host_and_vcpu_must_have(mmx);
> -            get_fpu(X86EMUL_FPU_mmx, &fic);
> +            get_fpu(X86EMUL_FPU_mmx);
>          }
> 
>          opc = init_prefixes(stub);
>          opc[0] = b;
> -        fic.insn_bytes = PFX_BYTES + 1;
> +        insn_bytes = PFX_BYTES + 1;
>          goto simd_0f_reg_only;
> 
>      case X86EMUL_OPC_66(0x0f, 0x78):     /* Grp17 */
> @@ -6367,14 +6331,14 @@ x86_emulate(
>          generate_exception_if(ea.type != OP_REG, EXC_UD);
> 
>          host_and_vcpu_must_have(sse4a);
> -        get_fpu(X86EMUL_FPU_xmm, &fic);
> +        get_fpu(X86EMUL_FPU_xmm);
> 
>          opc = init_prefixes(stub);
>          opc[0] = b;
>          opc[1] = modrm;
>          opc[2] = imm1;
>          opc[3] = imm2;
> -        fic.insn_bytes = PFX_BYTES + 4;
> +        insn_bytes = PFX_BYTES + 4;
>          goto simd_0f_reg_only;
> 
>      case X86EMUL_OPC_66(0x0f, 0x79):     /* extrq xmm,xmm */
> @@ -6502,7 +6466,7 @@ x86_emulate(
>              vcpu_must_have(sse);
>          ldmxcsr:
>              generate_exception_if(src.type != OP_MEM, EXC_UD);
> -            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm,
> &fic);
> +            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
>              generate_exception_if(src.val & ~mxcsr_mask, EXC_GP, 0);
>              asm volatile ( "ldmxcsr %0" :: "m" (src.val) );
>              break;
> @@ -6512,7 +6476,7 @@ x86_emulate(
>              vcpu_must_have(sse);
>          stmxcsr:
>              generate_exception_if(dst.type != OP_MEM, EXC_UD);
> -            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm,
> &fic);
> +            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
>              asm volatile ( "stmxcsr %0" : "=m" (dst.val) );
>              break;
> 
> @@ -6766,7 +6730,7 @@ x86_emulate(
>              if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
>                  goto simd_0f_imm8_sse2;
>              vcpu_must_have(sse);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>              goto simd_0f_imm8;
>          }
>          goto simd_0f_imm8_avx;
> @@ -6797,7 +6761,7 @@ x86_emulate(
>              vex.w = 0;
>          opc[1] = modrm & 0xc7;
>          opc[2] = imm1;
> -        fic.insn_bytes = PFX_BYTES + 3;
> +        insn_bytes = PFX_BYTES + 3;
>          goto simd_0f_to_gpr;
> 
>      case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */
> @@ -7043,18 +7007,18 @@ x86_emulate(
>              generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
>              d |= TwoOp;
>              host_and_vcpu_must_have(avx);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
>          else if ( vex.pfx )
>          {
>              vcpu_must_have(sse2);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              host_and_vcpu_must_have(mmx);
>              vcpu_must_have(mmxext);
> -            get_fpu(X86EMUL_FPU_mmx, &fic);
> +            get_fpu(X86EMUL_FPU_mmx);
>          }
> 
>          /*
> @@ -7074,7 +7038,6 @@ x86_emulate(
>          if ( !mode_64bit() )
>              vex.w = 0;
>          opc[1] = modrm & 0xc7;
> -        fic.insn_bytes = PFX_BYTES + 2;
>          opc[2] = 0xc3;
> 
>          copy_REX_VEX(opc, rex_prefix, vex);
> @@ -7087,6 +7050,7 @@ x86_emulate(
>          opc = init_prefixes(stub);
>          opc[0] = b;
>          opc[1] = modrm;
> +        insn_bytes = PFX_BYTES + 2;
>          /* Restore high bit of XMM destination. */
>          if ( sfence )
>          {
> @@ -7133,12 +7097,12 @@ x86_emulate(
>          if ( vex.pfx )
>          {
>      simd_0f38_common:
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              host_and_vcpu_must_have(mmx);
> -            get_fpu(X86EMUL_FPU_mmx, &fic);
> +            get_fpu(X86EMUL_FPU_mmx);
>          }
>          opc = init_prefixes(stub);
>          opc[0] = 0x38;
> @@ -7151,7 +7115,7 @@ x86_emulate(
>              vex.b = 1;
>              opc[2] &= 0x38;
>          }
> -        fic.insn_bytes = PFX_BYTES + 3;
> +        insn_bytes = PFX_BYTES + 3;
>          break;
> 
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd m64,ymm */
> @@ -7175,13 +7139,13 @@ x86_emulate(
>          if ( vex.opcx == vex_none )
>          {
>              host_and_vcpu_must_have(sse4_1);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              generate_exception_if(vex.reg != 0xf, EXC_UD);
>              host_and_vcpu_must_have(avx);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
> 
>          opc = init_prefixes(stub);
> @@ -7200,21 +7164,19 @@ x86_emulate(
>              vex.b = 1;
>              opc[1] &= 0x38;
>          }
> -        fic.insn_bytes = PFX_BYTES + 2;
> +        insn_bytes = PFX_BYTES + 2;
>          opc[2] = 0xc3;
>          if ( vex.opcx == vex_none )
>          {
>              /* Cover for extra prefix byte. */
>              --opc;
> -            ++fic.insn_bytes;
> +            ++insn_bytes;
>          }
> 
>          copy_REX_VEX(opc, rex_prefix, vex);
>          emulate_stub("+m" (*mmvalp), "a" (mmvalp));
> 
>          put_stub(stub);
> -        check_xmm_exn(&fic);
> -
>          state->simd_size = simd_none;
>          dst.type = OP_NONE;
>          break;
> @@ -7303,7 +7265,7 @@ x86_emulate(
> 
>          generate_exception_if(ea.type != OP_MEM || vex.w, EXC_UD);
>          host_and_vcpu_must_have(avx);
> -        get_fpu(X86EMUL_FPU_ymm, &fic);
> +        get_fpu(X86EMUL_FPU_ymm);
> 
>          /*
>           * While we can't reasonably provide fully correct behavior here
> @@ -7352,7 +7314,7 @@ x86_emulate(
>          rex_prefix &= ~REX_B;
>          vex.b = 1;
>          opc[1] = modrm & 0x38;
> -        fic.insn_bytes = PFX_BYTES + 2;
> +        insn_bytes = PFX_BYTES + 2;
> 
>          break;
>      }
> @@ -7401,7 +7363,7 @@ x86_emulate(
> 
>          generate_exception_if(ea.type != OP_MEM, EXC_UD);
>          host_and_vcpu_must_have(avx2);
> -        get_fpu(X86EMUL_FPU_ymm, &fic);
> +        get_fpu(X86EMUL_FPU_ymm);
> 
>          /*
>           * While we can't reasonably provide fully correct behavior here
> @@ -7448,7 +7410,7 @@ x86_emulate(
>          rex_prefix &= ~REX_B;
>          vex.b = 1;
>          opc[1] = modrm & 0x38;
> -        fic.insn_bytes = PFX_BYTES + 2;
> +        insn_bytes = PFX_BYTES + 2;
> 
>          break;
>      }
> @@ -7471,7 +7433,7 @@ x86_emulate(
>                                state->sib_index == mask_reg, EXC_UD);
>          generate_exception_if(!cpu_has_avx, EXC_UD);
>          vcpu_must_have(avx2);
> -        get_fpu(X86EMUL_FPU_ymm, &fic);
> +        get_fpu(X86EMUL_FPU_ymm);
> 
>          /* Read destination, index, and mask registers. */
>          opc = init_prefixes(stub);
> @@ -7808,12 +7770,12 @@ x86_emulate(
>          if ( vex.pfx )
>          {
>      simd_0f3a_common:
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              host_and_vcpu_must_have(mmx);
> -            get_fpu(X86EMUL_FPU_mmx, &fic);
> +            get_fpu(X86EMUL_FPU_mmx);
>          }
>          opc = init_prefixes(stub);
>          opc[0] = 0x3a;
> @@ -7827,7 +7789,7 @@ x86_emulate(
>              opc[2] &= 0x38;
>          }
>          opc[3] = imm1;
> -        fic.insn_bytes = PFX_BYTES + 4;
> +        insn_bytes = PFX_BYTES + 4;
>          break;
> 
>      case X86EMUL_OPC_66(0x0f3a, 0x14): /* pextrb $imm8,xmm,r/m */
> @@ -7835,7 +7797,7 @@ x86_emulate(
>      case X86EMUL_OPC_66(0x0f3a, 0x16): /* pextr{d,q} $imm8,xmm,r/m */
>      case X86EMUL_OPC_66(0x0f3a, 0x17): /* extractps $imm8,xmm,r/m */
>          host_and_vcpu_must_have(sse4_1);
> -        get_fpu(X86EMUL_FPU_xmm, &fic);
> +        get_fpu(X86EMUL_FPU_xmm);
> 
>          opc = init_prefixes(stub);
>          opc++[0] = 0x3a;
> @@ -7848,20 +7810,16 @@ x86_emulate(
>              vex.w = 0;
>          opc[1] = modrm & 0x38;
>          opc[2] = imm1;
> -        fic.insn_bytes = PFX_BYTES + 3;
>          opc[3] = 0xc3;
>          if ( vex.opcx == vex_none )
>          {
>              /* Cover for extra prefix byte. */
>              --opc;
> -            ++fic.insn_bytes;
>          }
> 
>          copy_REX_VEX(opc, rex_prefix, vex);
>          invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
> -
>          put_stub(stub);
> -        check_xmm_exn(&fic);
> 
>          ASSERT(!state->simd_size);
>          dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3);
> @@ -7875,7 +7833,7 @@ x86_emulate(
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m
> */
>          generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
>          host_and_vcpu_must_have(avx);
> -        get_fpu(X86EMUL_FPU_ymm, &fic);
> +        get_fpu(X86EMUL_FPU_ymm);
>          opc = init_prefixes(stub);
>          goto pextr;
> 
> @@ -7897,17 +7855,15 @@ x86_emulate(
>              opc[1] &= 0x38;
>          }
>          opc[2] = imm1;
> -        fic.insn_bytes = PFX_BYTES + 3;
> +        insn_bytes = PFX_BYTES + 3;
>          opc[3] = 0xc3;
> 
>          copy_VEX(opc, vex);
>          /* Latch MXCSR - we may need to restore it below. */
>          invoke_stub("stmxcsr %[mxcsr]", "",
> -                    "=m" (*mmvalp), "+m" (fic.exn_raised), [mxcsr] "=m" (mxcsr)
> -                    : "a" (mmvalp));
> +                    "=m" (*mmvalp), [mxcsr] "=m" (mxcsr) : "a" (mmvalp));
> 
>          put_stub(stub);
> -        check_xmm_exn(&fic);
> 
>          if ( ea.type == OP_MEM )
>          {
> @@ -7926,7 +7882,7 @@ x86_emulate(
>      case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */
>      case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */
>          host_and_vcpu_must_have(sse4_1);
> -        get_fpu(X86EMUL_FPU_xmm, &fic);
> +        get_fpu(X86EMUL_FPU_xmm);
>          memcpy(mmvalp, &src.val, op_bytes);
>          ea.type = OP_MEM;
>          op_bytes = src.bytes;
> @@ -8036,13 +7992,13 @@ x86_emulate(
>          if ( vex.opcx == vex_none )
>          {
>              host_and_vcpu_must_have(sse4_2);
> -            get_fpu(X86EMUL_FPU_xmm, &fic);
> +            get_fpu(X86EMUL_FPU_xmm);
>          }
>          else
>          {
>              generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
>              host_and_vcpu_must_have(avx);
> -            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            get_fpu(X86EMUL_FPU_ymm);
>          }
> 
>          opc = init_prefixes(stub);
> @@ -8063,13 +8019,13 @@ x86_emulate(
>                  goto done;
>          }
>          opc[2] = imm1;
> -        fic.insn_bytes = PFX_BYTES + 3;
> +        insn_bytes = PFX_BYTES + 3;
>          opc[3] = 0xc3;
>          if ( vex.opcx == vex_none )
>          {
>              /* Cover for extra prefix byte. */
>              --opc;
> -            ++fic.insn_bytes;
> +            ++insn_bytes;
>          }
> 
>          copy_REX_VEX(opc, rex_prefix, vex);
> @@ -8297,7 +8253,7 @@ x86_emulate(
> 
>          if ( !opc )
>              BUG();
> -        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
> +        opc[insn_bytes - PFX_BYTES] = 0xc3;
>          copy_REX_VEX(opc, rex_prefix, vex);
> 
>          if ( ea.type == OP_MEM )
> @@ -8374,13 +8330,11 @@ x86_emulate(
>          if ( likely((ctxt->opcode & ~(X86EMUL_OPC_PFX_MASK |
>                                        X86EMUL_OPC_ENCODING_MASK)) !=
>                      X86EMUL_OPC(0x0f, 0xf7)) )
> -            invoke_stub("", "", "+m" (*mmvalp), "+m" (fic.exn_raised)
> -                                : "a" (mmvalp));
> +            invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
>          else
>              invoke_stub("", "", "+m" (*mmvalp) : "D" (mmvalp));
> 
>          put_stub(stub);
> -        check_xmm_exn(&fic);
>      }
> 
>      switch ( dst.type )
> @@ -8423,7 +8377,8 @@ x86_emulate(
>      }
> 
>   complete_insn: /* Commit shadow register state. */
> -    put_fpu(&fic, false, state, ctxt, ops);
> +    put_fpu(fpu_type, false, state, ctxt, ops);
> +    fpu_type = X86EMUL_FPU_none;
> 
>      /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
>      if ( !mode_64bit() )
> @@ -8447,7 +8402,7 @@ x86_emulate(
>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
> 
>   done:
> -    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, state, ctxt,
> ops);
> +    put_fpu(fpu_type, insn_bytes > 0 && dst.type == OP_MEM, state, ctxt,
> ops);
>      put_stub(stub);
>      return rc;
>  #undef state
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -421,12 +421,8 @@ struct x86_emulate_ops
> 
>      /*
>       * get_fpu: Load emulated environment's FPU state onto processor.
> -     *  @exn_callback: On any FPU or SIMD exception, pass control to
> -     *                 (*exception_callback)(exception_callback_arg, regs).
>       */
>      int (*get_fpu)(
> -        void (*exception_callback)(void *, struct cpu_user_regs *),
> -        void *exception_callback_arg,
>          enum x86_emulate_fpu_type type,
>          struct x86_emulate_ctxt *ctxt);
> 
> --- a/xen/include/asm-x86/hvm/vcpu.h
> +++ b/xen/include/asm-x86/hvm/vcpu.h
> @@ -196,10 +196,6 @@ struct hvm_vcpu {
> 
>      struct hvm_vcpu_io  hvm_io;
> 
> -    /* Callback into x86_emulate when emulating FPU/MMX/XMM
> instructions. */
> -    void (*fpu_exception_callback)(void *, struct cpu_user_regs *);
> -    void *fpu_exception_callback_arg;
> -
>      /* Pending hw/sw interrupt (.vector = -1 means nothing pending). */
>      struct x86_event     inject_event;
> 
>
Jan Beulich Sept. 18, 2017, 9:35 a.m. UTC | #2
>>> On 18.09.17 at 11:27, <Paul.Durrant@citrix.com> wrote:
>>  -----Original Message-----
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: 14 September 2017 16:22
>> To: xen-devel <xen-devel@lists.xenproject.org>
>> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; Paul Durrant
>> <Paul.Durrant@citrix.com>; George Dunlap <George.Dunlap@citrix.com>
>> Subject: [PATCH v2 17/17] x86/HVM: eliminate custom #MF/#XM handling
>> 
>> Use the generic stub exception handling instead.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> The changes look ok to me, but I don't really have any context as to why 
> separate exception handling might have been desirable in the first place.

Having all exceptions dealt with in a central place and in similar
ways seems pretty desirable to me, and the diffstat of the patch
also looks to be a reason to do the conversion.

> Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

Thanks.

Jan
diff mbox

Patch

--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -134,8 +134,6 @@  int emul_test_read_xcr(
 }
 
 int emul_test_get_fpu(
-    void (*exception_callback)(void *, struct cpu_user_regs *),
-    void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt)
 {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -221,8 +221,6 @@  int emul_test_read_xcr(
     struct x86_emulate_ctxt *ctxt);
 
 int emul_test_get_fpu(
-    void (*exception_callback)(void *, struct cpu_user_regs *),
-    void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt);
 
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1739,8 +1739,6 @@  int hvmemul_cpuid(uint32_t leaf, uint32_
 }
 
 static int hvmemul_get_fpu(
-    void (*exception_callback)(void *, struct cpu_user_regs *),
-    void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt)
 {
@@ -1778,9 +1776,6 @@  static int hvmemul_get_fpu(
         }
     }
 
-    curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
-    curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
-
     return X86EMUL_OKAY;
 }
 
@@ -1791,8 +1786,6 @@  static void hvmemul_put_fpu(
 {
     struct vcpu *curr = current;
 
-    curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
-
     if ( aux )
     {
         typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr->arch.fpu_ctxt;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -703,7 +703,6 @@  void do_reserved_trap(struct cpu_user_re
 
 void do_trap(struct cpu_user_regs *regs)
 {
-    struct vcpu *curr = current;
     unsigned int trapnr = regs->entry_vector;
     unsigned long fixup;
 
@@ -723,15 +722,6 @@  void do_trap(struct cpu_user_regs *regs)
         return;
     }
 
-    if ( ((trapnr == TRAP_copro_error) || (trapnr == TRAP_simd_error)) &&
-         system_state >= SYS_STATE_active && is_hvm_vcpu(curr) &&
-         curr->arch.hvm_vcpu.fpu_exception_callback )
-    {
-        curr->arch.hvm_vcpu.fpu_exception_callback(
-            curr->arch.hvm_vcpu.fpu_exception_callback_arg, regs);
-        return;
-    }
-
     if ( likely((fixup = search_exception_table(regs)) != 0) )
     {
         dprintk(XENLOG_ERR, "Trap %u: %p [%ps] -> %p\n",
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -959,6 +959,33 @@  static inline int mkec(uint8_t e, int32_
 #define generate_exception(e, ec...) generate_exception_if(true, e, ##ec)
 
 #ifdef __XEN__
+static int exception_from_stub(union stub_exception_token res,
+                               void *stub, unsigned int line,
+                               struct x86_emulate_ctxt *ctxt,
+                               const struct x86_emulate_ops *ops)
+{
+    int rc = X86EMUL_UNHANDLEABLE;
+
+    generate_exception_if(res.fields.trapnr == EXC_MF, EXC_MF);
+    if ( res.fields.trapnr == EXC_XM )
+    {
+        unsigned long cr4;
+
+        if ( !ops->read_cr || !ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY )
+            cr4 = X86_CR4_OSXMMEXCPT;
+        generate_exception(cr4 & X86_CR4_OSXMMEXCPT ? EXC_XM : EXC_UD);
+    }
+    gprintk(XENLOG_WARNING,
+            "exception %u (ec=%04x) in emulation stub (line %u)\n",
+            res.fields.trapnr, res.fields.ec, line);
+    gprintk(XENLOG_INFO, "stub: %"__stringify(MAX_INST_LEN)"ph\n",  stub);
+    generate_exception_if(res.fields.trapnr == EXC_UD, EXC_UD);
+    domain_crash(current->domain);
+
+ done:
+    return rc;
+}
+
 # define invoke_stub(pre, post, constraints...) do {                    \
     union stub_exception_token res_ = { .raw = ~0 };                    \
     asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n"                \
@@ -974,14 +1001,8 @@  static inline int mkec(uint8_t e, int32_
                      "m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) );   \
     if ( unlikely(~res_.raw) )                                          \
     {                                                                   \
-        gprintk(XENLOG_WARNING,                                         \
-                "exception %u (ec=%04x) in emulation stub (line %u)\n", \
-                res_.fields.trapnr, res_.fields.ec, __LINE__);          \
-        gprintk(XENLOG_INFO, "stub: %"__stringify(MAX_INST_LEN)"ph\n",  \
-                stub.func);                                             \
-        generate_exception_if(res_.fields.trapnr == EXC_UD, EXC_UD);    \
-        domain_crash(current->domain);                                  \
-        goto cannot_emulate;                                            \
+        rc = exception_from_stub(res_, stub.func, __LINE__, ctxt, ops); \
+        goto done;                                                      \
     }                                                                   \
 } while (0)
 #else
@@ -1097,23 +1118,8 @@  do {
     ops->write_segment(x86_seg_cs, cs, ctxt);                           \
 })
 
-struct fpu_insn_ctxt {
-    uint8_t insn_bytes;
-    uint8_t type;
-    int8_t exn_raised;
-};
-
-static void fpu_handle_exception(void *_fic, struct cpu_user_regs *regs)
-{
-    struct fpu_insn_ctxt *fic = _fic;
-    ASSERT(regs->entry_vector < 0x20);
-    fic->exn_raised = regs->entry_vector;
-    regs->r(ip) += fic->insn_bytes;
-}
-
 static int _get_fpu(
     enum x86_emulate_fpu_type type,
-    struct fpu_insn_ctxt *fic,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
@@ -1138,14 +1144,13 @@  static int _get_fpu(
         break;
     }
 
-    rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
+    rc = ops->get_fpu(type, ctxt);
 
     if ( rc == X86EMUL_OKAY )
     {
         unsigned long cr0;
 
         fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
-        fic->type = type;
 
         fail_if(!ops->read_cr);
         if ( type >= X86EMUL_FPU_xmm )
@@ -1183,37 +1188,22 @@  static int _get_fpu(
     return rc;
 }
 
-#define get_fpu(_type, _fic)                                    \
+#define get_fpu(type)                                           \
 do {                                                            \
-    rc = _get_fpu(_type, _fic, ctxt, ops);                      \
+    rc = _get_fpu(fpu_type = (type), ctxt, ops);                \
     if ( rc ) goto done;                                        \
 } while (0)
 
-#define check_fpu_exn(fic)                                      \
-do {                                                            \
-    generate_exception_if((fic)->exn_raised >= 0,               \
-                          (fic)->exn_raised);                   \
-} while (0)
-
-#define check_xmm_exn(fic)                                      \
-do {                                                            \
-    if ( (fic)->exn_raised == EXC_XM && ops->read_cr &&         \
-         ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&         \
-         !(cr4 & X86_CR4_OSXMMEXCPT) )                          \
-        (fic)->exn_raised = EXC_UD;                             \
-    check_fpu_exn(fic);                                         \
-} while (0)
-
 static void put_fpu(
-    struct fpu_insn_ctxt *fic,
+    enum x86_emulate_fpu_type type,
     bool failed_late,
     const struct x86_emulate_state *state,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
-    if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
+    if ( unlikely(failed_late) && type == X86EMUL_FPU_fpu )
         ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
-    else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
+    else if ( unlikely(type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
     {
         struct x86_emul_fpu_aux aux = {
             .ip = ctxt->regs->r(ip),
@@ -1247,9 +1237,8 @@  static void put_fpu(
         }
         ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
     }
-    else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
+    else if ( type != X86EMUL_FPU_none && ops->put_fpu )
         ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
-    fic->type = X86EMUL_FPU_none;
 }
 
 static inline bool fpu_check_write(void)
@@ -1264,29 +1253,27 @@  static inline bool fpu_check_write(void)
 #define emulate_fpu_insn_memdst(opc, ext, arg)                          \
 do {                                                                    \
     /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
-    fic.insn_bytes = 2;                                                 \
+    insn_bytes = 2;                                                     \
     memcpy(get_stub(stub),                                              \
            ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
-    invoke_stub("", "", "+m" (fic), "+m" (arg) : "a" (&(arg)));         \
+    invoke_stub("", "", "+m" (arg) : "a" (&(arg)));                     \
     put_stub(stub);                                                     \
 } while (0)
 
 #define emulate_fpu_insn_memsrc(opc, ext, arg)                          \
 do {                                                                    \
     /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */            \
-    fic.insn_bytes = 2;                                                 \
     memcpy(get_stub(stub),                                              \
            ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3);            \
-    invoke_stub("", "", "+m" (fic) : "m" (arg), "a" (&(arg)));          \
+    invoke_stub("", "", "=m" (dummy) : "m" (arg), "a" (&(arg)));        \
     put_stub(stub);                                                     \
 } while (0)
 
 #define emulate_fpu_insn_stub(bytes...)                                 \
 do {                                                                    \
     unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
-    fic.insn_bytes = nr_;                                               \
     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
-    invoke_stub("", "", "=m" (fic) : "m" (fic));                        \
+    invoke_stub("", "", "=m" (dummy) : "i" (0));                        \
     put_stub(stub);                                                     \
 } while (0)
 
@@ -1294,12 +1281,10 @@  do {
 do {                                                                    \
     unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
     unsigned long tmp_;                                                 \
-    fic.insn_bytes = nr_;                                               \
     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
     invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),             \
                 _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),            \
-                [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_),       \
-                "+m" (fic)                                              \
+                [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_)        \
                 : [mask] "i" (X86_EFLAGS_ZF|X86_EFLAGS_PF|X86_EFLAGS_CF)); \
     put_stub(stub);                                                     \
 } while (0)
@@ -3142,14 +3127,14 @@  x86_emulate(
     struct x86_emulate_state state;
     int rc;
     uint8_t b, d, *opc = NULL;
-    unsigned int first_byte = 0;
+    unsigned int first_byte = 0, insn_bytes = 0;
     bool singlestep = (_regs.eflags & X86_EFLAGS_TF) &&
 	    !is_branch_step(ctxt, ops);
     bool sfence = false;
     struct operand src = { .reg = PTR_POISON };
     struct operand dst = { .reg = PTR_POISON };
     unsigned long cr4;
-    struct fpu_insn_ctxt fic = { .type = X86EMUL_FPU_none, .exn_raised = -1 };
+    enum x86_emulate_fpu_type fpu_type = X86EMUL_FPU_none;
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
 
@@ -3839,9 +3824,8 @@  x86_emulate(
 
     case 0x9b:  /* wait/fwait */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_wait, &fic);
+        get_fpu(X86EMUL_FPU_wait);
         emulate_fpu_insn_stub(b);
-        check_fpu_exn(&fic);
         break;
 
     case 0x9c: /* pushf */
@@ -4245,7 +4229,7 @@  x86_emulate(
 
     case 0xd8: /* FPU 0xd8 */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fadd %stN,%st */
@@ -4267,12 +4251,11 @@  x86_emulate(
             emulate_fpu_insn_memsrc(b, modrm_reg, src.val);
             break;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xd9: /* FPU 0xd9 */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xfb: /* fsincos */
@@ -4354,12 +4337,11 @@  x86_emulate(
             if ( dst.type == OP_MEM && !state->fpu_ctrl && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xda: /* FPU 0xda */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fcmovb %stN */
@@ -4376,12 +4358,11 @@  x86_emulate(
             generate_exception_if(ea.type != OP_MEM, EXC_UD);
             goto fpu_memsrc32;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xdb: /* FPU 0xdb */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fcmovnb %stN */
@@ -4434,12 +4415,11 @@  x86_emulate(
                 generate_exception(EXC_UD);
             }
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xdc: /* FPU 0xdc */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fadd %st,%stN */
@@ -4461,12 +4441,11 @@  x86_emulate(
             emulate_fpu_insn_memsrc(b, modrm_reg, src.val);
             break;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xdd: /* FPU 0xdd */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* ffree %stN */
@@ -4510,12 +4489,11 @@  x86_emulate(
             if ( dst.type == OP_MEM && !state->fpu_ctrl && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xde: /* FPU 0xde */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* faddp %stN */
@@ -4533,12 +4511,11 @@  x86_emulate(
             emulate_fpu_insn_memsrc(b, modrm_reg, src.val);
             break;
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xdf: /* FPU 0xdf */
         host_and_vcpu_must_have(fpu);
-        get_fpu(X86EMUL_FPU_fpu, &fic);
+        get_fpu(X86EMUL_FPU_fpu);
         switch ( modrm )
         {
         case 0xe0:
@@ -4583,7 +4560,6 @@  x86_emulate(
                 goto fpu_memdst64;
             }
         }
-        check_fpu_exn(&fic);
         break;
 
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
@@ -5415,7 +5391,7 @@  x86_emulate(
         else
             generate_exception(EXC_UD);
 
-        get_fpu(X86EMUL_FPU_mmx, &fic);
+        get_fpu(X86EMUL_FPU_mmx);
 
         d = DstReg | SrcMem;
         op_bytes = 8;
@@ -5505,7 +5481,7 @@  x86_emulate(
             else
                 vcpu_must_have(sse);
     simd_0f_xmm:
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
@@ -5515,7 +5491,7 @@  x86_emulate(
     simd_0f_avx:
             host_and_vcpu_must_have(avx);
     simd_0f_ymm:
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
     simd_0f_common:
         opc = init_prefixes(stub);
@@ -5528,7 +5504,7 @@  x86_emulate(
             vex.b = 1;
             opc[1] &= 0x38;
         }
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         break;
 
     case X86EMUL_OPC_66(0x0f, 0x12):       /* movlpd m64,xmm */
@@ -5615,12 +5591,12 @@  x86_emulate(
                 vcpu_must_have(sse2);
             else
                 vcpu_must_have(sse);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         if ( ea.type == OP_MEM )
@@ -5646,14 +5622,14 @@  x86_emulate(
                 vcpu_must_have(sse2);
             else
                 vcpu_must_have(sse);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             generate_exception_if(vex.reg != 0xf, EXC_UD);
             vex.l = 0;
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         opc = init_prefixes(stub);
@@ -5676,17 +5652,14 @@  x86_emulate(
             opc[1] = modrm & 0xc7;
         if ( !mode_64bit() )
             vex.w = 0;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
         ea.reg = decode_register(modrm_reg, &_regs, 0);
-        invoke_stub("", "", "=a" (*ea.reg), "+m" (fic.exn_raised)
-                            : "c" (mmvalp), "m" (*mmvalp));
+        invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         state->simd_size = simd_none;
         break;
 
@@ -5700,13 +5673,13 @@  x86_emulate(
                 vcpu_must_have(sse2);
             else
                 vcpu_must_have(sse);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             generate_exception_if(vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         opc = init_prefixes(stub);
@@ -5724,20 +5697,17 @@  x86_emulate(
             vex.b = 1;
             opc[1] &= 0x38;
         }
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
         invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),
                     _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
                     [eflags] "+g" (_regs.eflags),
-                    [tmp] "=&r" (dummy), "+m" (*mmvalp),
-                    "+m" (fic.exn_raised)
+                    [tmp] "=&r" (dummy), "+m" (*mmvalp)
                     : "a" (mmvalp), [mask] "i" (EFLAGS_MASK));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         ASSERT(!state->simd_size);
         break;
 
@@ -5875,9 +5845,9 @@  x86_emulate(
         if ( !mode_64bit() )
             vex.w = 0;
         opc[1] = modrm & 0xc7;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
     simd_0f_to_gpr:
-        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
+        opc[insn_bytes - PFX_BYTES] = 0xc3;
 
         generate_exception_if(ea.type != OP_REG, EXC_UD);
 
@@ -5896,9 +5866,9 @@  x86_emulate(
                     vcpu_must_have(sse);
             }
             if ( b == 0x50 || (vex.pfx & VEX_PREFIX_DOUBLE_MASK) )
-                get_fpu(X86EMUL_FPU_xmm, &fic);
+                get_fpu(X86EMUL_FPU_xmm);
             else
-                get_fpu(X86EMUL_FPU_mmx, &fic);
+                get_fpu(X86EMUL_FPU_mmx);
         }
         else
         {
@@ -5907,14 +5877,13 @@  x86_emulate(
                 host_and_vcpu_must_have(avx);
             else
                 host_and_vcpu_must_have(avx2);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         copy_REX_VEX(opc, rex_prefix, vex);
         invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
 
         ASSERT(!state->simd_size);
         dst.bytes = 4;
@@ -6080,7 +6049,7 @@  x86_emulate(
             goto simd_0f_sse2;
     simd_0f_mmx:
         host_and_vcpu_must_have(mmx);
-        get_fpu(X86EMUL_FPU_mmx, &fic);
+        get_fpu(X86EMUL_FPU_mmx);
         goto simd_0f_common;
 
     CASE_SIMD_PACKED_INT(0x0f, 0x6e):    /* mov{d,q} r/m,{,x}mm */
@@ -6091,17 +6060,17 @@  x86_emulate(
         {
             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
         else if ( vex.pfx )
         {
             vcpu_must_have(sse2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
 
     simd_0f_rm:
@@ -6113,17 +6082,14 @@  x86_emulate(
         if ( !mode_64bit() )
             vex.w = 0;
         opc[1] = modrm & 0x38;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
-        invoke_stub("", "", "+m" (src.val), "+m" (fic.exn_raised)
-                            : "a" (&src.val));
+        invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
         dst.val = src.val;
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         ASSERT(!state->simd_size);
         break;
 
@@ -6189,19 +6155,19 @@  x86_emulate(
                 host_and_vcpu_must_have(avx);
             }
     simd_0f_imm8_ymm:
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
         else if ( vex.pfx )
         {
     simd_0f_imm8_sse2:
             vcpu_must_have(sse2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
             vcpu_must_have(mmxext);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
     simd_0f_imm8:
         opc = init_prefixes(stub);
@@ -6215,7 +6181,7 @@  x86_emulate(
             opc[1] &= 0x38;
         }
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         break;
 
     CASE_SIMD_PACKED_INT(0x0f, 0x71):    /* Grp12 */
@@ -6243,33 +6209,31 @@  x86_emulate(
                 host_and_vcpu_must_have(avx2);
             else
                 host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
         else if ( vex.pfx )
         {
             vcpu_must_have(sse2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
 
         opc = init_prefixes(stub);
         opc[0] = b;
         opc[1] = modrm;
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
     simd_0f_reg_only:
-        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
+        opc[insn_bytes - PFX_BYTES] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
         invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         ASSERT(!state->simd_size);
         break;
 
@@ -6304,7 +6268,7 @@  x86_emulate(
         {
             generate_exception_if(vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
 
 #ifdef __x86_64__
             if ( !mode_64bit() )
@@ -6346,12 +6310,12 @@  x86_emulate(
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
 
         opc = init_prefixes(stub);
         opc[0] = b;
-        fic.insn_bytes = PFX_BYTES + 1;
+        insn_bytes = PFX_BYTES + 1;
         goto simd_0f_reg_only;
 
     case X86EMUL_OPC_66(0x0f, 0x78):     /* Grp17 */
@@ -6367,14 +6331,14 @@  x86_emulate(
         generate_exception_if(ea.type != OP_REG, EXC_UD);
 
         host_and_vcpu_must_have(sse4a);
-        get_fpu(X86EMUL_FPU_xmm, &fic);
+        get_fpu(X86EMUL_FPU_xmm);
 
         opc = init_prefixes(stub);
         opc[0] = b;
         opc[1] = modrm;
         opc[2] = imm1;
         opc[3] = imm2;
-        fic.insn_bytes = PFX_BYTES + 4;
+        insn_bytes = PFX_BYTES + 4;
         goto simd_0f_reg_only;
 
     case X86EMUL_OPC_66(0x0f, 0x79):     /* extrq xmm,xmm */
@@ -6502,7 +6466,7 @@  x86_emulate(
             vcpu_must_have(sse);
         ldmxcsr:
             generate_exception_if(src.type != OP_MEM, EXC_UD);
-            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm, &fic);
+            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
             generate_exception_if(src.val & ~mxcsr_mask, EXC_GP, 0);
             asm volatile ( "ldmxcsr %0" :: "m" (src.val) );
             break;
@@ -6512,7 +6476,7 @@  x86_emulate(
             vcpu_must_have(sse);
         stmxcsr:
             generate_exception_if(dst.type != OP_MEM, EXC_UD);
-            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm, &fic);
+            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
             asm volatile ( "stmxcsr %0" : "=m" (dst.val) );
             break;
 
@@ -6766,7 +6730,7 @@  x86_emulate(
             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
                 goto simd_0f_imm8_sse2;
             vcpu_must_have(sse);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
             goto simd_0f_imm8;
         }
         goto simd_0f_imm8_avx;
@@ -6797,7 +6761,7 @@  x86_emulate(
             vex.w = 0;
         opc[1] = modrm & 0xc7;
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         goto simd_0f_to_gpr;
 
     case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */
@@ -7043,18 +7007,18 @@  x86_emulate(
             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
             d |= TwoOp;
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
         else if ( vex.pfx )
         {
             vcpu_must_have(sse2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
             vcpu_must_have(mmxext);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
 
         /*
@@ -7074,7 +7038,6 @@  x86_emulate(
         if ( !mode_64bit() )
             vex.w = 0;
         opc[1] = modrm & 0xc7;
-        fic.insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
 
         copy_REX_VEX(opc, rex_prefix, vex);
@@ -7087,6 +7050,7 @@  x86_emulate(
         opc = init_prefixes(stub);
         opc[0] = b;
         opc[1] = modrm;
+        insn_bytes = PFX_BYTES + 2;
         /* Restore high bit of XMM destination. */
         if ( sfence )
         {
@@ -7133,12 +7097,12 @@  x86_emulate(
         if ( vex.pfx )
         {
     simd_0f38_common:
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
         opc = init_prefixes(stub);
         opc[0] = 0x38;
@@ -7151,7 +7115,7 @@  x86_emulate(
             vex.b = 1;
             opc[2] &= 0x38;
         }
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         break;
 
     case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd m64,ymm */
@@ -7175,13 +7139,13 @@  x86_emulate(
         if ( vex.opcx == vex_none )
         {
             host_and_vcpu_must_have(sse4_1);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             generate_exception_if(vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         opc = init_prefixes(stub);
@@ -7200,21 +7164,19 @@  x86_emulate(
             vex.b = 1;
             opc[1] &= 0x38;
         }
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
         opc[2] = 0xc3;
         if ( vex.opcx == vex_none )
         {
             /* Cover for extra prefix byte. */
             --opc;
-            ++fic.insn_bytes;
+            ++insn_bytes;
         }
 
         copy_REX_VEX(opc, rex_prefix, vex);
         emulate_stub("+m" (*mmvalp), "a" (mmvalp));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
-
         state->simd_size = simd_none;
         dst.type = OP_NONE;
         break;
@@ -7303,7 +7265,7 @@  x86_emulate(
 
         generate_exception_if(ea.type != OP_MEM || vex.w, EXC_UD);
         host_and_vcpu_must_have(avx);
-        get_fpu(X86EMUL_FPU_ymm, &fic);
+        get_fpu(X86EMUL_FPU_ymm);
 
         /*
          * While we can't reasonably provide fully correct behavior here
@@ -7352,7 +7314,7 @@  x86_emulate(
         rex_prefix &= ~REX_B;
         vex.b = 1;
         opc[1] = modrm & 0x38;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
 
         break;
     }
@@ -7401,7 +7363,7 @@  x86_emulate(
 
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
         host_and_vcpu_must_have(avx2);
-        get_fpu(X86EMUL_FPU_ymm, &fic);
+        get_fpu(X86EMUL_FPU_ymm);
 
         /*
          * While we can't reasonably provide fully correct behavior here
@@ -7448,7 +7410,7 @@  x86_emulate(
         rex_prefix &= ~REX_B;
         vex.b = 1;
         opc[1] = modrm & 0x38;
-        fic.insn_bytes = PFX_BYTES + 2;
+        insn_bytes = PFX_BYTES + 2;
 
         break;
     }
@@ -7471,7 +7433,7 @@  x86_emulate(
                               state->sib_index == mask_reg, EXC_UD);
         generate_exception_if(!cpu_has_avx, EXC_UD);
         vcpu_must_have(avx2);
-        get_fpu(X86EMUL_FPU_ymm, &fic);
+        get_fpu(X86EMUL_FPU_ymm);
 
         /* Read destination, index, and mask registers. */
         opc = init_prefixes(stub);
@@ -7808,12 +7770,12 @@  x86_emulate(
         if ( vex.pfx )
         {
     simd_0f3a_common:
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             host_and_vcpu_must_have(mmx);
-            get_fpu(X86EMUL_FPU_mmx, &fic);
+            get_fpu(X86EMUL_FPU_mmx);
         }
         opc = init_prefixes(stub);
         opc[0] = 0x3a;
@@ -7827,7 +7789,7 @@  x86_emulate(
             opc[2] &= 0x38;
         }
         opc[3] = imm1;
-        fic.insn_bytes = PFX_BYTES + 4;
+        insn_bytes = PFX_BYTES + 4;
         break;
 
     case X86EMUL_OPC_66(0x0f3a, 0x14): /* pextrb $imm8,xmm,r/m */
@@ -7835,7 +7797,7 @@  x86_emulate(
     case X86EMUL_OPC_66(0x0f3a, 0x16): /* pextr{d,q} $imm8,xmm,r/m */
     case X86EMUL_OPC_66(0x0f3a, 0x17): /* extractps $imm8,xmm,r/m */
         host_and_vcpu_must_have(sse4_1);
-        get_fpu(X86EMUL_FPU_xmm, &fic);
+        get_fpu(X86EMUL_FPU_xmm);
 
         opc = init_prefixes(stub);
         opc++[0] = 0x3a;
@@ -7848,20 +7810,16 @@  x86_emulate(
             vex.w = 0;
         opc[1] = modrm & 0x38;
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
         opc[3] = 0xc3;
         if ( vex.opcx == vex_none )
         {
             /* Cover for extra prefix byte. */
             --opc;
-            ++fic.insn_bytes;
         }
 
         copy_REX_VEX(opc, rex_prefix, vex);
         invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
-
         put_stub(stub);
-        check_xmm_exn(&fic);
 
         ASSERT(!state->simd_size);
         dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3);
@@ -7875,7 +7833,7 @@  x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */
         generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
         host_and_vcpu_must_have(avx);
-        get_fpu(X86EMUL_FPU_ymm, &fic);
+        get_fpu(X86EMUL_FPU_ymm);
         opc = init_prefixes(stub);
         goto pextr;
 
@@ -7897,17 +7855,15 @@  x86_emulate(
             opc[1] &= 0x38;
         }
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         opc[3] = 0xc3;
 
         copy_VEX(opc, vex);
         /* Latch MXCSR - we may need to restore it below. */
         invoke_stub("stmxcsr %[mxcsr]", "",
-                    "=m" (*mmvalp), "+m" (fic.exn_raised), [mxcsr] "=m" (mxcsr)
-                    : "a" (mmvalp));
+                    "=m" (*mmvalp), [mxcsr] "=m" (mxcsr) : "a" (mmvalp));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
 
         if ( ea.type == OP_MEM )
         {
@@ -7926,7 +7882,7 @@  x86_emulate(
     case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */
     case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */
         host_and_vcpu_must_have(sse4_1);
-        get_fpu(X86EMUL_FPU_xmm, &fic);
+        get_fpu(X86EMUL_FPU_xmm);
         memcpy(mmvalp, &src.val, op_bytes);
         ea.type = OP_MEM;
         op_bytes = src.bytes;
@@ -8036,13 +7992,13 @@  x86_emulate(
         if ( vex.opcx == vex_none )
         {
             host_and_vcpu_must_have(sse4_2);
-            get_fpu(X86EMUL_FPU_xmm, &fic);
+            get_fpu(X86EMUL_FPU_xmm);
         }
         else
         {
             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
             host_and_vcpu_must_have(avx);
-            get_fpu(X86EMUL_FPU_ymm, &fic);
+            get_fpu(X86EMUL_FPU_ymm);
         }
 
         opc = init_prefixes(stub);
@@ -8063,13 +8019,13 @@  x86_emulate(
                 goto done;
         }
         opc[2] = imm1;
-        fic.insn_bytes = PFX_BYTES + 3;
+        insn_bytes = PFX_BYTES + 3;
         opc[3] = 0xc3;
         if ( vex.opcx == vex_none )
         {
             /* Cover for extra prefix byte. */
             --opc;
-            ++fic.insn_bytes;
+            ++insn_bytes;
         }
 
         copy_REX_VEX(opc, rex_prefix, vex);
@@ -8297,7 +8253,7 @@  x86_emulate(
 
         if ( !opc )
             BUG();
-        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
+        opc[insn_bytes - PFX_BYTES] = 0xc3;
         copy_REX_VEX(opc, rex_prefix, vex);
 
         if ( ea.type == OP_MEM )
@@ -8374,13 +8330,11 @@  x86_emulate(
         if ( likely((ctxt->opcode & ~(X86EMUL_OPC_PFX_MASK |
                                       X86EMUL_OPC_ENCODING_MASK)) !=
                     X86EMUL_OPC(0x0f, 0xf7)) )
-            invoke_stub("", "", "+m" (*mmvalp), "+m" (fic.exn_raised)
-                                : "a" (mmvalp));
+            invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
         else
             invoke_stub("", "", "+m" (*mmvalp) : "D" (mmvalp));
 
         put_stub(stub);
-        check_xmm_exn(&fic);
     }
 
     switch ( dst.type )
@@ -8423,7 +8377,8 @@  x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
-    put_fpu(&fic, false, state, ctxt, ops);
+    put_fpu(fpu_type, false, state, ctxt, ops);
+    fpu_type = X86EMUL_FPU_none;
 
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
@@ -8447,7 +8402,7 @@  x86_emulate(
     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
 
  done:
-    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
+    put_fpu(fpu_type, insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -421,12 +421,8 @@  struct x86_emulate_ops
 
     /*
      * get_fpu: Load emulated environment's FPU state onto processor.
-     *  @exn_callback: On any FPU or SIMD exception, pass control to
-     *                 (*exception_callback)(exception_callback_arg, regs).
      */
     int (*get_fpu)(
-        void (*exception_callback)(void *, struct cpu_user_regs *),
-        void *exception_callback_arg,
         enum x86_emulate_fpu_type type,
         struct x86_emulate_ctxt *ctxt);
 
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -196,10 +196,6 @@  struct hvm_vcpu {
 
     struct hvm_vcpu_io  hvm_io;
 
-    /* Callback into x86_emulate when emulating FPU/MMX/XMM instructions. */
-    void (*fpu_exception_callback)(void *, struct cpu_user_regs *);
-    void *fpu_exception_callback_arg;
-
     /* Pending hw/sw interrupt (.vector = -1 means nothing pending). */
     struct x86_event     inject_event;