diff mbox

[v2,14/17] x86emul: abstract out XCRn accesses

Message ID 59BABA4D020000780017B3F6@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Sept. 14, 2017, 3:20 p.m. UTC
Use hooks, just like done for other special purpose registers.

This includes moving XCR0 checks from hvmemul_get_fpu() to the emulator
itself as well as adding support for XGETBV emulation.

For now fuzzer reads will obtain the real values (minus the fuzzing of
the hook pointer itself).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Re-base.

Comments

Jan Beulich Sept. 14, 2017, 3:25 p.m. UTC | #1
>>> On 14.09.17 at 17:20, <JBeulich@suse.com> wrote:
> Use hooks, just like done for other special purpose registers.
> 
> This includes moving XCR0 checks from hvmemul_get_fpu() to the emulator
> itself as well as adding support for XGETBV emulation.
> 
> For now fuzzer reads will obtain the real values (minus the fuzzing of
> the hook pointer itself).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

I'm sorry, Paul, I forgot to Cc you on this one.

Jan

> ---
> v2: Re-base.
> 
> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
> @@ -435,6 +435,8 @@ static int fuzz_write_cr(
>      return X86EMUL_OKAY;
>  }
>  
> +#define fuzz_read_xcr emul_test_read_xcr
> +
>  enum {
>      MSRI_IA32_SYSENTER_CS,
>      MSRI_IA32_SYSENTER_ESP,
> @@ -553,6 +555,7 @@ static const struct x86_emulate_ops all_
>      SET(write_io),
>      SET(read_cr),
>      SET(write_cr),
> +    SET(read_xcr),
>      SET(read_msr),
>      SET(write_msr),
>      SET(wbinvd),
> @@ -661,6 +664,7 @@ enum {
>      HOOK_write_cr,
>      HOOK_read_dr,
>      HOOK_write_dr,
> +    HOOK_read_xcr,
>      HOOK_read_msr,
>      HOOK_write_msr,
>      HOOK_wbinvd,
> @@ -705,6 +709,7 @@ static void disable_hooks(struct x86_emu
>      MAYBE_DISABLE_HOOK(write_io);
>      MAYBE_DISABLE_HOOK(read_cr);
>      MAYBE_DISABLE_HOOK(write_cr);
> +    MAYBE_DISABLE_HOOK(read_xcr);
>      MAYBE_DISABLE_HOOK(read_msr);
>      MAYBE_DISABLE_HOOK(write_msr);
>      MAYBE_DISABLE_HOOK(wbinvd);
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -368,6 +368,7 @@ static struct x86_emulate_ops emulops =
>      .read_segment = read_segment,
>      .cpuid      = emul_test_cpuid,
>      .read_cr    = emul_test_read_cr,
> +    .read_xcr   = emul_test_read_xcr,
>      .read_msr   = read_msr,
>      .get_fpu    = emul_test_get_fpu,
>      .put_fpu    = emul_test_put_fpu,
> --- a/tools/tests/x86_emulator/x86_emulate.c
> +++ b/tools/tests/x86_emulator/x86_emulate.c
> @@ -120,6 +120,19 @@ int emul_test_read_cr(
>      return X86EMUL_UNHANDLEABLE;
>  }
>  
> +int emul_test_read_xcr(
> +    unsigned int reg,
> +    uint64_t *val,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    uint32_t lo, hi;
> +
> +    asm ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (reg) );
> +    *val = lo | ((uint64_t)hi << 32);
> +
> +    return X86EMUL_OKAY;
> +}
> +
>  int emul_test_get_fpu(
>      void (*exception_callback)(void *, struct cpu_user_regs *),
>      void *exception_callback_arg,
> --- a/tools/tests/x86_emulator/x86_emulate.h
> +++ b/tools/tests/x86_emulator/x86_emulate.h
> @@ -215,6 +215,11 @@ int emul_test_read_cr(
>      unsigned long *val,
>      struct x86_emulate_ctxt *ctxt);
>  
> +int emul_test_read_xcr(
> +    unsigned int reg,
> +    uint64_t *val,
> +    struct x86_emulate_ctxt *ctxt);
> +
>  int emul_test_get_fpu(
>      void (*exception_callback)(void *, struct cpu_user_regs *),
>      void *exception_callback_arg,
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1655,6 +1655,49 @@ static int hvmemul_write_cr(
>      return rc;
>  }
>  
> +static int hvmemul_read_xcr(
> +    unsigned int reg,
> +    uint64_t *val,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    uint32_t lo, hi;
> +
> +    switch ( reg )
> +    {
> +    case 0:
> +        *val = current->arch.xcr0;
> +        return X86EMUL_OKAY;
> +
> +    case 1:
> +        if ( !cpu_has_xgetbv1 )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
> +
> +    default:
> +        return X86EMUL_UNHANDLEABLE;
> +    }
> +
> +    asm ( ".byte 0x0f,0x01,0xd0" /* xgetbv */
> +          : "=a" (lo), "=d" (hi) : "c" (reg) );
> +    *val = lo | ((uint64_t)hi << 32);
> +    HVMTRACE_LONG_2D(XCR_READ, reg, TRC_PAR_LONG(*val));
> +
> +    return X86EMUL_OKAY;
> +}
> +
> +static int hvmemul_write_xcr(
> +    unsigned int reg,
> +    uint64_t val,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    HVMTRACE_LONG_2D(XCR_WRITE, reg, TRC_PAR_LONG(val));
> +    if ( likely(handle_xsetbv(reg, val) == 0) )
> +        return X86EMUL_OKAY;
> +
> +    x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
> +    return X86EMUL_EXCEPTION;
> +}
> +
>  static int hvmemul_read_msr(
>      unsigned int reg,
>      uint64_t *val,
> @@ -1703,22 +1746,6 @@ static int hvmemul_get_fpu(
>  {
>      struct vcpu *curr = current;
>  
> -    switch ( type )
> -    {
> -    case X86EMUL_FPU_fpu:
> -    case X86EMUL_FPU_wait:
> -    case X86EMUL_FPU_mmx:
> -    case X86EMUL_FPU_xmm:
> -        break;
> -    case X86EMUL_FPU_ymm:
> -        if ( !(curr->arch.xcr0 & XSTATE_SSE) ||
> -             !(curr->arch.xcr0 & XSTATE_YMM) )
> -            return X86EMUL_UNHANDLEABLE;
> -        break;
> -    default:
> -        return X86EMUL_UNHANDLEABLE;
> -    }
> -
>      if ( !curr->fpu_dirtied )
>          hvm_funcs.fpu_dirty_intercept();
>      else if ( type == X86EMUL_FPU_fpu )
> @@ -1902,6 +1929,8 @@ static const struct x86_emulate_ops hvm_
>      .write_io      = hvmemul_write_io,
>      .read_cr       = hvmemul_read_cr,
>      .write_cr      = hvmemul_write_cr,
> +    .read_xcr      = hvmemul_read_xcr,
> +    .write_xcr     = hvmemul_write_xcr,
>      .read_msr      = hvmemul_read_msr,
>      .write_msr     = hvmemul_write_msr,
>      .wbinvd        = hvmemul_wbinvd,
> @@ -1927,6 +1956,8 @@ static const struct x86_emulate_ops hvm_
>      .write_io      = hvmemul_write_io_discard,
>      .read_cr       = hvmemul_read_cr,
>      .write_cr      = hvmemul_write_cr,
> +    .read_xcr      = hvmemul_read_xcr,
> +    .write_xcr     = hvmemul_write_xcr,
>      .read_msr      = hvmemul_read_msr,
>      .write_msr     = hvmemul_write_msr_discard,
>      .wbinvd        = hvmemul_wbinvd_discard,
> --- a/xen/arch/x86/pv/emul-priv-op.c
> +++ b/xen/arch/x86/pv/emul-priv-op.c
> @@ -36,6 +36,7 @@
>  #include <asm/shared.h>
>  #include <asm/traps.h>
>  #include <asm/x86_emulate.h>
> +#include <asm/xstate.h>
>  
>  #include <xsm/xsm.h>
>  
> @@ -817,6 +818,16 @@ static int write_dr(unsigned int reg, un
>             ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
>  }
>  
> +static int write_xcr(unsigned int reg, uint64_t val,
> +                     struct x86_emulate_ctxt *ctxt)
> +{
> +    if ( likely(handle_xsetbv(reg, val) == 0) )
> +        return X86EMUL_OKAY;
> +
> +    x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
> +    return X86EMUL_EXCEPTION;
> +}
> +
>  static inline uint64_t guest_misc_enable(uint64_t val)
>  {
>      val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
> @@ -1329,6 +1340,7 @@ static const struct x86_emulate_ops priv
>      .write_cr            = write_cr,
>      .read_dr             = read_dr,
>      .write_dr            = write_dr,
> +    .write_xcr           = write_xcr,
>      .read_msr            = read_msr,
>      .write_msr           = write_msr,
>      .cpuid               = pv_emul_cpuid,
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -1117,10 +1117,27 @@ static int _get_fpu(
>      struct x86_emulate_ctxt *ctxt,
>      const struct x86_emulate_ops *ops)
>  {
> +    uint64_t xcr0;
>      int rc;
>  
>      fail_if(!ops->get_fpu);
>      ASSERT(type != X86EMUL_FPU_none);
> +
> +    if ( type < X86EMUL_FPU_ymm || !ops->read_xcr ||
> +         ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY )
> +        xcr0 = 0;
> +
> +    switch ( type )
> +    {
> +    case X86EMUL_FPU_ymm:
> +        if ( !(xcr0 & XSTATE_SSE) || !(xcr0 & XSTATE_YMM) )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
> +
> +    default:
> +        break;
> +    }
> +
>      rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
>  
>      if ( rc == X86EMUL_OKAY )
> @@ -1648,7 +1665,8 @@ in_protmode(
>  #define EBX 3
>  
>  static bool vcpu_has(
> -    unsigned int eax,
> +    unsigned int leaf,
> +    unsigned int subleaf,
>      unsigned int reg,
>      unsigned int bit,
>      struct x86_emulate_ctxt *ctxt,
> @@ -1658,7 +1676,7 @@ static bool vcpu_has(
>      int rc = X86EMUL_OKAY;
>  
>      fail_if(!ops->cpuid);
> -    rc = ops->cpuid(eax, 0, &res, ctxt);
> +    rc = ops->cpuid(leaf, subleaf, &res, ctxt);
>      if ( rc == X86EMUL_OKAY )
>      {
>          switch ( reg )
> @@ -1677,53 +1695,56 @@ static bool vcpu_has(
>      return rc == X86EMUL_OKAY;
>  }
>  
> -#define vcpu_has_fpu()         vcpu_has(         1, EDX,  0, ctxt, ops)
> -#define vcpu_has_sep()         vcpu_has(         1, EDX, 11, ctxt, ops)
> -#define vcpu_has_cx8()         vcpu_has(         1, EDX,  8, ctxt, ops)
> -#define vcpu_has_cmov()        vcpu_has(         1, EDX, 15, ctxt, ops)
> -#define vcpu_has_clflush()     vcpu_has(         1, EDX, 19, ctxt, ops)
> -#define vcpu_has_mmx()         vcpu_has(         1, EDX, 23, ctxt, ops)
> -#define vcpu_has_sse()         vcpu_has(         1, EDX, 25, ctxt, ops)
> -#define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)
> -#define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)
> -#define vcpu_has_pclmulqdq()   vcpu_has(         1, ECX,  1, ctxt, ops)
> -#define vcpu_has_ssse3()       vcpu_has(         1, ECX,  9, ctxt, ops)
> -#define vcpu_has_fma()         vcpu_has(         1, ECX, 12, ctxt, ops)
> -#define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
> -#define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)
> -#define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
> -#define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
> -#define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
> -#define vcpu_has_aesni()       vcpu_has(         1, ECX, 25, ctxt, ops)
> -#define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
> -#define vcpu_has_f16c()        vcpu_has(         1, ECX, 29, ctxt, ops)
> -#define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
> -#define vcpu_has_mmxext()     (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
> +#define X 0 /* Just for documentation purposes. */
> +
> +#define vcpu_has_fpu()         vcpu_has(         1, X, EDX,  0, ctxt, ops)
> +#define vcpu_has_sep()         vcpu_has(         1, X, EDX, 11, ctxt, ops)
> +#define vcpu_has_cx8()         vcpu_has(         1, X, EDX,  8, ctxt, ops)
> +#define vcpu_has_cmov()        vcpu_has(         1, X, EDX, 15, ctxt, ops)
> +#define vcpu_has_clflush()     vcpu_has(         1, X, EDX, 19, ctxt, ops)
> +#define vcpu_has_mmx()         vcpu_has(         1, X, EDX, 23, ctxt, ops)
> +#define vcpu_has_sse()         vcpu_has(         1, X, EDX, 25, ctxt, ops)
> +#define vcpu_has_sse2()        vcpu_has(         1, X, EDX, 26, ctxt, ops)
> +#define vcpu_has_sse3()        vcpu_has(         1, X, ECX,  0, ctxt, ops)
> +#define vcpu_has_pclmulqdq()   vcpu_has(         1, X, ECX,  1, ctxt, ops)
> +#define vcpu_has_ssse3()       vcpu_has(         1, X, ECX,  9, ctxt, ops)
> +#define vcpu_has_fma()         vcpu_has(         1, X, ECX, 12, ctxt, ops)
> +#define vcpu_has_cx16()        vcpu_has(         1, X, ECX, 13, ctxt, ops)
> +#define vcpu_has_sse4_1()      vcpu_has(         1, X, ECX, 19, ctxt, ops)
> +#define vcpu_has_sse4_2()      vcpu_has(         1, X, ECX, 20, ctxt, ops)
> +#define vcpu_has_movbe()       vcpu_has(         1, X, ECX, 22, ctxt, ops)
> +#define vcpu_has_popcnt()      vcpu_has(         1, X, ECX, 23, ctxt, ops)
> +#define vcpu_has_aesni()       vcpu_has(         1, X, ECX, 25, ctxt, ops)
> +#define vcpu_has_avx()         vcpu_has(         1, X, ECX, 28, ctxt, ops)
> +#define vcpu_has_f16c()        vcpu_has(         1, X, ECX, 29, ctxt, ops)
> +#define vcpu_has_rdrand()      vcpu_has(         1, X, ECX, 30, ctxt, ops)
> +#define vcpu_has_mmxext()     (vcpu_has(0x80000001, X, EDX, 22, ctxt, ops) || \
>                                 vcpu_has_sse())
> -#define vcpu_has_3dnow_ext()   vcpu_has(0x80000001, EDX, 30, ctxt, ops)
> -#define vcpu_has_3dnow()       vcpu_has(0x80000001, EDX, 31, ctxt, ops)
> -#define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
> -#define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
> -#define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
> -#define vcpu_has_sse4a()       vcpu_has(0x80000001, ECX,  6, ctxt, ops)
> -#define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
> -#define vcpu_has_xop()         vcpu_has(0x80000001, ECX, 12, ctxt, ops)
> -#define vcpu_has_fma4()        vcpu_has(0x80000001, ECX, 16, ctxt, ops)
> -#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
> -#define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
> -#define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
> -#define vcpu_has_avx2()        vcpu_has(         7, EBX,  5, ctxt, ops)
> -#define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
> -#define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
> -#define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
> -#define vcpu_has_rdseed()      vcpu_has(         7, EBX, 18, ctxt, ops)
> -#define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
> -#define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
> -#define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
> -#define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)
> -#define vcpu_has_sha()         vcpu_has(         7, EBX, 29, ctxt, ops)
> -#define vcpu_has_rdpid()       vcpu_has(         7, ECX, 22, ctxt, ops)
> -#define vcpu_has_clzero()      vcpu_has(0x80000008, EBX,  0, ctxt, ops)
> +#define vcpu_has_3dnow_ext()   vcpu_has(0x80000001, X, EDX, 30, ctxt, ops)
> +#define vcpu_has_3dnow()       vcpu_has(0x80000001, X, EDX, 31, ctxt, ops)
> +#define vcpu_has_lahf_lm()     vcpu_has(0x80000001, X, ECX,  0, ctxt, ops)
> +#define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, X, ECX,  4, ctxt, ops)
> +#define vcpu_has_lzcnt()       vcpu_has(0x80000001, X, ECX,  5, ctxt, ops)
> +#define vcpu_has_sse4a()       vcpu_has(0x80000001, X, ECX,  6, ctxt, ops)
> +#define vcpu_has_misalignsse() vcpu_has(0x80000001, X, ECX,  7, ctxt, ops)
> +#define vcpu_has_xop()         vcpu_has(0x80000001, X, ECX, 12, ctxt, ops)
> +#define vcpu_has_fma4()        vcpu_has(0x80000001, X, ECX, 16, ctxt, ops)
> +#define vcpu_has_tbm()         vcpu_has(0x80000001, X, ECX, 21, ctxt, ops)
> +#define vcpu_has_bmi1()        vcpu_has(         7, 0, EBX,  3, ctxt, ops)
> +#define vcpu_has_hle()         vcpu_has(         7, 0, EBX,  4, ctxt, ops)
> +#define vcpu_has_avx2()        vcpu_has(         7, 0, EBX,  5, ctxt, ops)
> +#define vcpu_has_bmi2()        vcpu_has(         7, 0, EBX,  8, ctxt, ops)
> +#define vcpu_has_rtm()         vcpu_has(         7, 0, EBX, 11, ctxt, ops)
> +#define vcpu_has_mpx()         vcpu_has(         7, 0, EBX, 14, ctxt, ops)
> +#define vcpu_has_rdseed()      vcpu_has(         7, 0, EBX, 18, ctxt, ops)
> +#define vcpu_has_adx()         vcpu_has(         7, 0, EBX, 19, ctxt, ops)
> +#define vcpu_has_smap()        vcpu_has(         7, 0, EBX, 20, ctxt, ops)
> +#define vcpu_has_clflushopt()  vcpu_has(         7, 0, EBX, 23, ctxt, ops)
> +#define vcpu_has_clwb()        vcpu_has(         7, 0, EBX, 24, ctxt, ops)
> +#define vcpu_has_sha()         vcpu_has(         7, 0, EBX, 29, ctxt, ops)
> +#define vcpu_has_rdpid()       vcpu_has(         7, 0, ECX, 22, ctxt, ops)
> +#define vcpu_has_xgetbv1()     vcpu_has(       0xd, 1, EAX,  2, ctxt, ops)
> +#define vcpu_has_clzero()      vcpu_has(0x80000008, X, EBX,  0, ctxt, ops)
>  
>  #define vcpu_must_have(feat) \
>      generate_exception_if(!vcpu_has_##feat(), EXC_UD)
> @@ -5158,18 +5179,33 @@ x86_emulate(
>                  _regs.eflags |= X86_EFLAGS_AC;
>              break;
>  
> -#ifdef __XEN__
> -        case 0xd1: /* xsetbv */
> +        case 0xd0: /* xgetbv */
>              generate_exception_if(vex.pfx, EXC_UD);
> -            if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY 
> )
> +            if ( !ops->read_cr || !ops->read_xcr ||
> +                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
>                  cr4 = 0;
>              generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
> -            generate_exception_if(!mode_ring0() ||
> -                                  handle_xsetbv(_regs.ecx,
> -                                                _regs.eax | (_regs.rdx << 32)),
> +            generate_exception_if(_regs.ecx > (vcpu_has_xgetbv1() ? 1 : 0),
>                                    EXC_GP, 0);
> +            rc = ops->read_xcr(_regs.ecx, &msr_val, ctxt);
> +            if ( rc != X86EMUL_OKAY )
> +                goto done;
> +            _regs.r(ax) = (uint32_t)msr_val;
> +            _regs.r(dx) = msr_val >> 32;
> +            break;
> +
> +        case 0xd1: /* xsetbv */
> +            generate_exception_if(vex.pfx, EXC_UD);
> +            if ( !ops->read_cr || !ops->write_xcr ||
> +                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
> +                cr4 = 0;
> +            generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
> +            generate_exception_if(!mode_ring0() || _regs.ecx, EXC_GP, 0);
> +            rc = ops->write_xcr(_regs.ecx,
> +                                _regs.eax | ((uint64_t)_regs.edx << 32), ctxt);
> +            if ( rc != X86EMUL_OKAY )
> +                goto done;
>              break;
> -#endif
>  
>          case 0xd4: /* vmfunc */
>              generate_exception_if(vex.pfx, EXC_UD);
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -373,6 +373,24 @@ struct x86_emulate_ops
>          struct x86_emulate_ctxt *ctxt);
>  
>      /*
> +     * read_xcr: Read from extended control register.
> +     *  @reg:   [IN ] Register to read.
> +     */
> +    int (*read_xcr)(
> +        unsigned int reg,
> +        uint64_t *val,
> +        struct x86_emulate_ctxt *ctxt);
> +
> +    /*
> +     * write_xcr: Write to extended control register.
> +     *  @reg:   [IN ] Register to write.
> +     */
> +    int (*write_xcr)(
> +        unsigned int reg,
> +        uint64_t val,
> +        struct x86_emulate_ctxt *ctxt);
> +
> +    /*
>       * read_msr: Read from model-specific register.
>       *  @reg:   [IN ] Register to read.
>       */
> --- a/xen/include/asm-x86/hvm/trace.h
> +++ b/xen/include/asm-x86/hvm/trace.h
> @@ -33,6 +33,8 @@
>  #define DO_TRC_HVM_CR_WRITE64  DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_DR_READ     DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_DR_WRITE    DEFAULT_HVM_REGACCESS
> +#define DO_TRC_HVM_XCR_READ64  DEFAULT_HVM_REGACCESS
> +#define DO_TRC_HVM_XCR_WRITE64 DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_MSR_READ    DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_MSR_WRITE   DEFAULT_HVM_REGACCESS
>  #define DO_TRC_HVM_RDTSC       DEFAULT_HVM_REGACCESS
> --- a/xen/include/asm-x86/x86-defns.h
> +++ b/xen/include/asm-x86/x86-defns.h
> @@ -66,4 +66,28 @@
>  #define X86_CR4_SMAP       0x00200000 /* enable SMAP */
>  #define X86_CR4_PKE        0x00400000 /* enable PKE */
>  
> +/*
> + * XSTATE component flags in XCR0
> + */
> +#define _XSTATE_FP                0
> +#define XSTATE_FP                 (1ULL << _XSTATE_FP)
> +#define _XSTATE_SSE               1
> +#define XSTATE_SSE                (1ULL << _XSTATE_SSE)
> +#define _XSTATE_YMM               2
> +#define XSTATE_YMM                (1ULL << _XSTATE_YMM)
> +#define _XSTATE_BNDREGS           3
> +#define XSTATE_BNDREGS            (1ULL << _XSTATE_BNDREGS)
> +#define _XSTATE_BNDCSR            4
> +#define XSTATE_BNDCSR             (1ULL << _XSTATE_BNDCSR)
> +#define _XSTATE_OPMASK            5
> +#define XSTATE_OPMASK             (1ULL << _XSTATE_OPMASK)
> +#define _XSTATE_ZMM               6
> +#define XSTATE_ZMM                (1ULL << _XSTATE_ZMM)
> +#define _XSTATE_HI_ZMM            7
> +#define XSTATE_HI_ZMM             (1ULL << _XSTATE_HI_ZMM)
> +#define _XSTATE_PKRU              9
> +#define XSTATE_PKRU               (1ULL << _XSTATE_PKRU)
> +#define _XSTATE_LWP               62
> +#define XSTATE_LWP                (1ULL << _XSTATE_LWP)
> +
>  #endif	/* __XEN_X86_DEFNS_H__ */
> --- a/xen/include/asm-x86/xstate.h
> +++ b/xen/include/asm-x86/xstate.h
> @@ -10,6 +10,7 @@
>  
>  #include <xen/sched.h>
>  #include <asm/cpufeature.h>
> +#include <asm/x86-defns.h>
>  
>  #define FCW_DEFAULT               0x037f
>  #define FCW_RESET                 0x0040
> @@ -28,27 +29,6 @@ extern uint32_t mxcsr_mask;
>  #define XSAVE_HDR_OFFSET          FXSAVE_SIZE
>  #define XSTATE_AREA_MIN_SIZE      (FXSAVE_SIZE + XSAVE_HDR_SIZE)
>  
> -#define _XSTATE_FP                0
> -#define XSTATE_FP                 (1ULL << _XSTATE_FP)
> -#define _XSTATE_SSE               1
> -#define XSTATE_SSE                (1ULL << _XSTATE_SSE)
> -#define _XSTATE_YMM               2
> -#define XSTATE_YMM                (1ULL << _XSTATE_YMM)
> -#define _XSTATE_BNDREGS           3
> -#define XSTATE_BNDREGS            (1ULL << _XSTATE_BNDREGS)
> -#define _XSTATE_BNDCSR            4
> -#define XSTATE_BNDCSR             (1ULL << _XSTATE_BNDCSR)
> -#define _XSTATE_OPMASK            5
> -#define XSTATE_OPMASK             (1ULL << _XSTATE_OPMASK)
> -#define _XSTATE_ZMM               6
> -#define XSTATE_ZMM                (1ULL << _XSTATE_ZMM)
> -#define _XSTATE_HI_ZMM            7
> -#define XSTATE_HI_ZMM             (1ULL << _XSTATE_HI_ZMM)
> -#define _XSTATE_PKRU              9
> -#define XSTATE_PKRU               (1ULL << _XSTATE_PKRU)
> -#define _XSTATE_LWP               62
> -#define XSTATE_LWP                (1ULL << _XSTATE_LWP)
> -
>  #define XSTATE_FP_SSE  (XSTATE_FP | XSTATE_SSE)
>  #define XCNTXT_MASK    (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | \
>                          XSTATE_ZMM | XSTATE_HI_ZMM | XSTATE_NONLAZY)
> --- a/xen/include/public/trace.h
> +++ b/xen/include/public/trace.h
> @@ -235,6 +235,8 @@
>  #define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)
>  #define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)
>  #define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)
> +#define TRC_HVM_XCR_READ64      (TRC_HVM_HANDLER + TRC_64_FLAG + 0x26)
> +#define TRC_HVM_XCR_WRITE64     (TRC_HVM_HANDLER + TRC_64_FLAG + 0x27)
>  
>  #define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)
>  #define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)
Paul Durrant Sept. 18, 2017, 9:12 a.m. UTC | #2
> -----Original Message-----

> From: Xen-devel [mailto:xen-devel-bounces@lists.xen.org] On Behalf Of Jan

> Beulich

> Sent: 14 September 2017 16:20

> To: xen-devel <xen-devel@lists.xenproject.org>

> Cc: George Dunlap <George.Dunlap@citrix.com>; Andrew Cooper

> <Andrew.Cooper3@citrix.com>

> Subject: [Xen-devel] [PATCH v2 14/17] x86emul: abstract out XCRn accesses

> 

> Use hooks, just like done for other special purpose registers.

> 

> This includes moving XCR0 checks from hvmemul_get_fpu() to the emulator

> itself as well as adding support for XGETBV emulation.

> 

> For now fuzzer reads will obtain the real values (minus the fuzzing of

> the hook pointer itself).

> 

> Signed-off-by: Jan Beulich <jbeulich@suse.com>


Reviewed-by: Paul Durrant <paul.durrant@citrix.com>


> ---

> v2: Re-base.

> 

> --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c

> +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c

> @@ -435,6 +435,8 @@ static int fuzz_write_cr(

>      return X86EMUL_OKAY;

>  }

> 

> +#define fuzz_read_xcr emul_test_read_xcr

> +

>  enum {

>      MSRI_IA32_SYSENTER_CS,

>      MSRI_IA32_SYSENTER_ESP,

> @@ -553,6 +555,7 @@ static const struct x86_emulate_ops all_

>      SET(write_io),

>      SET(read_cr),

>      SET(write_cr),

> +    SET(read_xcr),

>      SET(read_msr),

>      SET(write_msr),

>      SET(wbinvd),

> @@ -661,6 +664,7 @@ enum {

>      HOOK_write_cr,

>      HOOK_read_dr,

>      HOOK_write_dr,

> +    HOOK_read_xcr,

>      HOOK_read_msr,

>      HOOK_write_msr,

>      HOOK_wbinvd,

> @@ -705,6 +709,7 @@ static void disable_hooks(struct x86_emu

>      MAYBE_DISABLE_HOOK(write_io);

>      MAYBE_DISABLE_HOOK(read_cr);

>      MAYBE_DISABLE_HOOK(write_cr);

> +    MAYBE_DISABLE_HOOK(read_xcr);

>      MAYBE_DISABLE_HOOK(read_msr);

>      MAYBE_DISABLE_HOOK(write_msr);

>      MAYBE_DISABLE_HOOK(wbinvd);

> --- a/tools/tests/x86_emulator/test_x86_emulator.c

> +++ b/tools/tests/x86_emulator/test_x86_emulator.c

> @@ -368,6 +368,7 @@ static struct x86_emulate_ops emulops =

>      .read_segment = read_segment,

>      .cpuid      = emul_test_cpuid,

>      .read_cr    = emul_test_read_cr,

> +    .read_xcr   = emul_test_read_xcr,

>      .read_msr   = read_msr,

>      .get_fpu    = emul_test_get_fpu,

>      .put_fpu    = emul_test_put_fpu,

> --- a/tools/tests/x86_emulator/x86_emulate.c

> +++ b/tools/tests/x86_emulator/x86_emulate.c

> @@ -120,6 +120,19 @@ int emul_test_read_cr(

>      return X86EMUL_UNHANDLEABLE;

>  }

> 

> +int emul_test_read_xcr(

> +    unsigned int reg,

> +    uint64_t *val,

> +    struct x86_emulate_ctxt *ctxt)

> +{

> +    uint32_t lo, hi;

> +

> +    asm ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (reg) );

> +    *val = lo | ((uint64_t)hi << 32);

> +

> +    return X86EMUL_OKAY;

> +}

> +

>  int emul_test_get_fpu(

>      void (*exception_callback)(void *, struct cpu_user_regs *),

>      void *exception_callback_arg,

> --- a/tools/tests/x86_emulator/x86_emulate.h

> +++ b/tools/tests/x86_emulator/x86_emulate.h

> @@ -215,6 +215,11 @@ int emul_test_read_cr(

>      unsigned long *val,

>      struct x86_emulate_ctxt *ctxt);

> 

> +int emul_test_read_xcr(

> +    unsigned int reg,

> +    uint64_t *val,

> +    struct x86_emulate_ctxt *ctxt);

> +

>  int emul_test_get_fpu(

>      void (*exception_callback)(void *, struct cpu_user_regs *),

>      void *exception_callback_arg,

> --- a/xen/arch/x86/hvm/emulate.c

> +++ b/xen/arch/x86/hvm/emulate.c

> @@ -1655,6 +1655,49 @@ static int hvmemul_write_cr(

>      return rc;

>  }

> 

> +static int hvmemul_read_xcr(

> +    unsigned int reg,

> +    uint64_t *val,

> +    struct x86_emulate_ctxt *ctxt)

> +{

> +    uint32_t lo, hi;

> +

> +    switch ( reg )

> +    {

> +    case 0:

> +        *val = current->arch.xcr0;

> +        return X86EMUL_OKAY;

> +

> +    case 1:

> +        if ( !cpu_has_xgetbv1 )

> +            return X86EMUL_UNHANDLEABLE;

> +        break;

> +

> +    default:

> +        return X86EMUL_UNHANDLEABLE;

> +    }

> +

> +    asm ( ".byte 0x0f,0x01,0xd0" /* xgetbv */

> +          : "=a" (lo), "=d" (hi) : "c" (reg) );

> +    *val = lo | ((uint64_t)hi << 32);

> +    HVMTRACE_LONG_2D(XCR_READ, reg, TRC_PAR_LONG(*val));

> +

> +    return X86EMUL_OKAY;

> +}

> +

> +static int hvmemul_write_xcr(

> +    unsigned int reg,

> +    uint64_t val,

> +    struct x86_emulate_ctxt *ctxt)

> +{

> +    HVMTRACE_LONG_2D(XCR_WRITE, reg, TRC_PAR_LONG(val));

> +    if ( likely(handle_xsetbv(reg, val) == 0) )

> +        return X86EMUL_OKAY;

> +

> +    x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);

> +    return X86EMUL_EXCEPTION;

> +}

> +

>  static int hvmemul_read_msr(

>      unsigned int reg,

>      uint64_t *val,

> @@ -1703,22 +1746,6 @@ static int hvmemul_get_fpu(

>  {

>      struct vcpu *curr = current;

> 

> -    switch ( type )

> -    {

> -    case X86EMUL_FPU_fpu:

> -    case X86EMUL_FPU_wait:

> -    case X86EMUL_FPU_mmx:

> -    case X86EMUL_FPU_xmm:

> -        break;

> -    case X86EMUL_FPU_ymm:

> -        if ( !(curr->arch.xcr0 & XSTATE_SSE) ||

> -             !(curr->arch.xcr0 & XSTATE_YMM) )

> -            return X86EMUL_UNHANDLEABLE;

> -        break;

> -    default:

> -        return X86EMUL_UNHANDLEABLE;

> -    }

> -

>      if ( !curr->fpu_dirtied )

>          hvm_funcs.fpu_dirty_intercept();

>      else if ( type == X86EMUL_FPU_fpu )

> @@ -1902,6 +1929,8 @@ static const struct x86_emulate_ops hvm_

>      .write_io      = hvmemul_write_io,

>      .read_cr       = hvmemul_read_cr,

>      .write_cr      = hvmemul_write_cr,

> +    .read_xcr      = hvmemul_read_xcr,

> +    .write_xcr     = hvmemul_write_xcr,

>      .read_msr      = hvmemul_read_msr,

>      .write_msr     = hvmemul_write_msr,

>      .wbinvd        = hvmemul_wbinvd,

> @@ -1927,6 +1956,8 @@ static const struct x86_emulate_ops hvm_

>      .write_io      = hvmemul_write_io_discard,

>      .read_cr       = hvmemul_read_cr,

>      .write_cr      = hvmemul_write_cr,

> +    .read_xcr      = hvmemul_read_xcr,

> +    .write_xcr     = hvmemul_write_xcr,

>      .read_msr      = hvmemul_read_msr,

>      .write_msr     = hvmemul_write_msr_discard,

>      .wbinvd        = hvmemul_wbinvd_discard,

> --- a/xen/arch/x86/pv/emul-priv-op.c

> +++ b/xen/arch/x86/pv/emul-priv-op.c

> @@ -36,6 +36,7 @@

>  #include <asm/shared.h>

>  #include <asm/traps.h>

>  #include <asm/x86_emulate.h>

> +#include <asm/xstate.h>

> 

>  #include <xsm/xsm.h>

> 

> @@ -817,6 +818,16 @@ static int write_dr(unsigned int reg, un

>             ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;

>  }

> 

> +static int write_xcr(unsigned int reg, uint64_t val,

> +                     struct x86_emulate_ctxt *ctxt)

> +{

> +    if ( likely(handle_xsetbv(reg, val) == 0) )

> +        return X86EMUL_OKAY;

> +

> +    x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);

> +    return X86EMUL_EXCEPTION;

> +}

> +

>  static inline uint64_t guest_misc_enable(uint64_t val)

>  {

>      val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |

> @@ -1329,6 +1340,7 @@ static const struct x86_emulate_ops priv

>      .write_cr            = write_cr,

>      .read_dr             = read_dr,

>      .write_dr            = write_dr,

> +    .write_xcr           = write_xcr,

>      .read_msr            = read_msr,

>      .write_msr           = write_msr,

>      .cpuid               = pv_emul_cpuid,

> --- a/xen/arch/x86/x86_emulate/x86_emulate.c

> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c

> @@ -1117,10 +1117,27 @@ static int _get_fpu(

>      struct x86_emulate_ctxt *ctxt,

>      const struct x86_emulate_ops *ops)

>  {

> +    uint64_t xcr0;

>      int rc;

> 

>      fail_if(!ops->get_fpu);

>      ASSERT(type != X86EMUL_FPU_none);

> +

> +    if ( type < X86EMUL_FPU_ymm || !ops->read_xcr ||

> +         ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY )

> +        xcr0 = 0;

> +

> +    switch ( type )

> +    {

> +    case X86EMUL_FPU_ymm:

> +        if ( !(xcr0 & XSTATE_SSE) || !(xcr0 & XSTATE_YMM) )

> +            return X86EMUL_UNHANDLEABLE;

> +        break;

> +

> +    default:

> +        break;

> +    }

> +

>      rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);

> 

>      if ( rc == X86EMUL_OKAY )

> @@ -1648,7 +1665,8 @@ in_protmode(

>  #define EBX 3

> 

>  static bool vcpu_has(

> -    unsigned int eax,

> +    unsigned int leaf,

> +    unsigned int subleaf,

>      unsigned int reg,

>      unsigned int bit,

>      struct x86_emulate_ctxt *ctxt,

> @@ -1658,7 +1676,7 @@ static bool vcpu_has(

>      int rc = X86EMUL_OKAY;

> 

>      fail_if(!ops->cpuid);

> -    rc = ops->cpuid(eax, 0, &res, ctxt);

> +    rc = ops->cpuid(leaf, subleaf, &res, ctxt);

>      if ( rc == X86EMUL_OKAY )

>      {

>          switch ( reg )

> @@ -1677,53 +1695,56 @@ static bool vcpu_has(

>      return rc == X86EMUL_OKAY;

>  }

> 

> -#define vcpu_has_fpu()         vcpu_has(         1, EDX,  0, ctxt, ops)

> -#define vcpu_has_sep()         vcpu_has(         1, EDX, 11, ctxt, ops)

> -#define vcpu_has_cx8()         vcpu_has(         1, EDX,  8, ctxt, ops)

> -#define vcpu_has_cmov()        vcpu_has(         1, EDX, 15, ctxt, ops)

> -#define vcpu_has_clflush()     vcpu_has(         1, EDX, 19, ctxt, ops)

> -#define vcpu_has_mmx()         vcpu_has(         1, EDX, 23, ctxt, ops)

> -#define vcpu_has_sse()         vcpu_has(         1, EDX, 25, ctxt, ops)

> -#define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)

> -#define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)

> -#define vcpu_has_pclmulqdq()   vcpu_has(         1, ECX,  1, ctxt, ops)

> -#define vcpu_has_ssse3()       vcpu_has(         1, ECX,  9, ctxt, ops)

> -#define vcpu_has_fma()         vcpu_has(         1, ECX, 12, ctxt, ops)

> -#define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)

> -#define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)

> -#define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)

> -#define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)

> -#define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)

> -#define vcpu_has_aesni()       vcpu_has(         1, ECX, 25, ctxt, ops)

> -#define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)

> -#define vcpu_has_f16c()        vcpu_has(         1, ECX, 29, ctxt, ops)

> -#define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)

> -#define vcpu_has_mmxext()     (vcpu_has(0x80000001, EDX, 22, ctxt, ops) ||

> \

> +#define X 0 /* Just for documentation purposes. */

> +

> +#define vcpu_has_fpu()         vcpu_has(         1, X, EDX,  0, ctxt, ops)

> +#define vcpu_has_sep()         vcpu_has(         1, X, EDX, 11, ctxt, ops)

> +#define vcpu_has_cx8()         vcpu_has(         1, X, EDX,  8, ctxt, ops)

> +#define vcpu_has_cmov()        vcpu_has(         1, X, EDX, 15, ctxt, ops)

> +#define vcpu_has_clflush()     vcpu_has(         1, X, EDX, 19, ctxt, ops)

> +#define vcpu_has_mmx()         vcpu_has(         1, X, EDX, 23, ctxt, ops)

> +#define vcpu_has_sse()         vcpu_has(         1, X, EDX, 25, ctxt, ops)

> +#define vcpu_has_sse2()        vcpu_has(         1, X, EDX, 26, ctxt, ops)

> +#define vcpu_has_sse3()        vcpu_has(         1, X, ECX,  0, ctxt, ops)

> +#define vcpu_has_pclmulqdq()   vcpu_has(         1, X, ECX,  1, ctxt, ops)

> +#define vcpu_has_ssse3()       vcpu_has(         1, X, ECX,  9, ctxt, ops)

> +#define vcpu_has_fma()         vcpu_has(         1, X, ECX, 12, ctxt, ops)

> +#define vcpu_has_cx16()        vcpu_has(         1, X, ECX, 13, ctxt, ops)

> +#define vcpu_has_sse4_1()      vcpu_has(         1, X, ECX, 19, ctxt, ops)

> +#define vcpu_has_sse4_2()      vcpu_has(         1, X, ECX, 20, ctxt, ops)

> +#define vcpu_has_movbe()       vcpu_has(         1, X, ECX, 22, ctxt, ops)

> +#define vcpu_has_popcnt()      vcpu_has(         1, X, ECX, 23, ctxt, ops)

> +#define vcpu_has_aesni()       vcpu_has(         1, X, ECX, 25, ctxt, ops)

> +#define vcpu_has_avx()         vcpu_has(         1, X, ECX, 28, ctxt, ops)

> +#define vcpu_has_f16c()        vcpu_has(         1, X, ECX, 29, ctxt, ops)

> +#define vcpu_has_rdrand()      vcpu_has(         1, X, ECX, 30, ctxt, ops)

> +#define vcpu_has_mmxext()     (vcpu_has(0x80000001, X, EDX, 22, ctxt, ops)

> || \

>                                 vcpu_has_sse())

> -#define vcpu_has_3dnow_ext()   vcpu_has(0x80000001, EDX, 30, ctxt, ops)

> -#define vcpu_has_3dnow()       vcpu_has(0x80000001, EDX, 31, ctxt, ops)

> -#define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)

> -#define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)

> -#define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)

> -#define vcpu_has_sse4a()       vcpu_has(0x80000001, ECX,  6, ctxt, ops)

> -#define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)

> -#define vcpu_has_xop()         vcpu_has(0x80000001, ECX, 12, ctxt, ops)

> -#define vcpu_has_fma4()        vcpu_has(0x80000001, ECX, 16, ctxt, ops)

> -#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)

> -#define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)

> -#define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)

> -#define vcpu_has_avx2()        vcpu_has(         7, EBX,  5, ctxt, ops)

> -#define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)

> -#define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)

> -#define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)

> -#define vcpu_has_rdseed()      vcpu_has(         7, EBX, 18, ctxt, ops)

> -#define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)

> -#define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)

> -#define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)

> -#define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)

> -#define vcpu_has_sha()         vcpu_has(         7, EBX, 29, ctxt, ops)

> -#define vcpu_has_rdpid()       vcpu_has(         7, ECX, 22, ctxt, ops)

> -#define vcpu_has_clzero()      vcpu_has(0x80000008, EBX,  0, ctxt, ops)

> +#define vcpu_has_3dnow_ext()   vcpu_has(0x80000001, X, EDX, 30, ctxt,

> ops)

> +#define vcpu_has_3dnow()       vcpu_has(0x80000001, X, EDX, 31, ctxt, ops)

> +#define vcpu_has_lahf_lm()     vcpu_has(0x80000001, X, ECX,  0, ctxt, ops)

> +#define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, X, ECX,  4, ctxt, ops)

> +#define vcpu_has_lzcnt()       vcpu_has(0x80000001, X, ECX,  5, ctxt, ops)

> +#define vcpu_has_sse4a()       vcpu_has(0x80000001, X, ECX,  6, ctxt, ops)

> +#define vcpu_has_misalignsse() vcpu_has(0x80000001, X, ECX,  7, ctxt, ops)

> +#define vcpu_has_xop()         vcpu_has(0x80000001, X, ECX, 12, ctxt, ops)

> +#define vcpu_has_fma4()        vcpu_has(0x80000001, X, ECX, 16, ctxt, ops)

> +#define vcpu_has_tbm()         vcpu_has(0x80000001, X, ECX, 21, ctxt, ops)

> +#define vcpu_has_bmi1()        vcpu_has(         7, 0, EBX,  3, ctxt, ops)

> +#define vcpu_has_hle()         vcpu_has(         7, 0, EBX,  4, ctxt, ops)

> +#define vcpu_has_avx2()        vcpu_has(         7, 0, EBX,  5, ctxt, ops)

> +#define vcpu_has_bmi2()        vcpu_has(         7, 0, EBX,  8, ctxt, ops)

> +#define vcpu_has_rtm()         vcpu_has(         7, 0, EBX, 11, ctxt, ops)

> +#define vcpu_has_mpx()         vcpu_has(         7, 0, EBX, 14, ctxt, ops)

> +#define vcpu_has_rdseed()      vcpu_has(         7, 0, EBX, 18, ctxt, ops)

> +#define vcpu_has_adx()         vcpu_has(         7, 0, EBX, 19, ctxt, ops)

> +#define vcpu_has_smap()        vcpu_has(         7, 0, EBX, 20, ctxt, ops)

> +#define vcpu_has_clflushopt()  vcpu_has(         7, 0, EBX, 23, ctxt, ops)

> +#define vcpu_has_clwb()        vcpu_has(         7, 0, EBX, 24, ctxt, ops)

> +#define vcpu_has_sha()         vcpu_has(         7, 0, EBX, 29, ctxt, ops)

> +#define vcpu_has_rdpid()       vcpu_has(         7, 0, ECX, 22, ctxt, ops)

> +#define vcpu_has_xgetbv1()     vcpu_has(       0xd, 1, EAX,  2, ctxt, ops)

> +#define vcpu_has_clzero()      vcpu_has(0x80000008, X, EBX,  0, ctxt, ops)

> 

>  #define vcpu_must_have(feat) \

>      generate_exception_if(!vcpu_has_##feat(), EXC_UD)

> @@ -5158,18 +5179,33 @@ x86_emulate(

>                  _regs.eflags |= X86_EFLAGS_AC;

>              break;

> 

> -#ifdef __XEN__

> -        case 0xd1: /* xsetbv */

> +        case 0xd0: /* xgetbv */

>              generate_exception_if(vex.pfx, EXC_UD);

> -            if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )

> +            if ( !ops->read_cr || !ops->read_xcr ||

> +                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )

>                  cr4 = 0;

>              generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);

> -            generate_exception_if(!mode_ring0() ||

> -                                  handle_xsetbv(_regs.ecx,

> -                                                _regs.eax | (_regs.rdx << 32)),

> +            generate_exception_if(_regs.ecx > (vcpu_has_xgetbv1() ? 1 : 0),

>                                    EXC_GP, 0);

> +            rc = ops->read_xcr(_regs.ecx, &msr_val, ctxt);

> +            if ( rc != X86EMUL_OKAY )

> +                goto done;

> +            _regs.r(ax) = (uint32_t)msr_val;

> +            _regs.r(dx) = msr_val >> 32;

> +            break;

> +

> +        case 0xd1: /* xsetbv */

> +            generate_exception_if(vex.pfx, EXC_UD);

> +            if ( !ops->read_cr || !ops->write_xcr ||

> +                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )

> +                cr4 = 0;

> +            generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);

> +            generate_exception_if(!mode_ring0() || _regs.ecx, EXC_GP, 0);

> +            rc = ops->write_xcr(_regs.ecx,

> +                                _regs.eax | ((uint64_t)_regs.edx << 32), ctxt);

> +            if ( rc != X86EMUL_OKAY )

> +                goto done;

>              break;

> -#endif

> 

>          case 0xd4: /* vmfunc */

>              generate_exception_if(vex.pfx, EXC_UD);

> --- a/xen/arch/x86/x86_emulate/x86_emulate.h

> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h

> @@ -373,6 +373,24 @@ struct x86_emulate_ops

>          struct x86_emulate_ctxt *ctxt);

> 

>      /*

> +     * read_xcr: Read from extended control register.

> +     *  @reg:   [IN ] Register to read.

> +     */

> +    int (*read_xcr)(

> +        unsigned int reg,

> +        uint64_t *val,

> +        struct x86_emulate_ctxt *ctxt);

> +

> +    /*

> +     * write_xcr: Write to extended control register.

> +     *  @reg:   [IN ] Register to write.

> +     */

> +    int (*write_xcr)(

> +        unsigned int reg,

> +        uint64_t val,

> +        struct x86_emulate_ctxt *ctxt);

> +

> +    /*

>       * read_msr: Read from model-specific register.

>       *  @reg:   [IN ] Register to read.

>       */

> --- a/xen/include/asm-x86/hvm/trace.h

> +++ b/xen/include/asm-x86/hvm/trace.h

> @@ -33,6 +33,8 @@

>  #define DO_TRC_HVM_CR_WRITE64  DEFAULT_HVM_REGACCESS

>  #define DO_TRC_HVM_DR_READ     DEFAULT_HVM_REGACCESS

>  #define DO_TRC_HVM_DR_WRITE    DEFAULT_HVM_REGACCESS

> +#define DO_TRC_HVM_XCR_READ64  DEFAULT_HVM_REGACCESS

> +#define DO_TRC_HVM_XCR_WRITE64 DEFAULT_HVM_REGACCESS

>  #define DO_TRC_HVM_MSR_READ    DEFAULT_HVM_REGACCESS

>  #define DO_TRC_HVM_MSR_WRITE   DEFAULT_HVM_REGACCESS

>  #define DO_TRC_HVM_RDTSC       DEFAULT_HVM_REGACCESS

> --- a/xen/include/asm-x86/x86-defns.h

> +++ b/xen/include/asm-x86/x86-defns.h

> @@ -66,4 +66,28 @@

>  #define X86_CR4_SMAP       0x00200000 /* enable SMAP */

>  #define X86_CR4_PKE        0x00400000 /* enable PKE */

> 

> +/*

> + * XSTATE component flags in XCR0

> + */

> +#define _XSTATE_FP                0

> +#define XSTATE_FP                 (1ULL << _XSTATE_FP)

> +#define _XSTATE_SSE               1

> +#define XSTATE_SSE                (1ULL << _XSTATE_SSE)

> +#define _XSTATE_YMM               2

> +#define XSTATE_YMM                (1ULL << _XSTATE_YMM)

> +#define _XSTATE_BNDREGS           3

> +#define XSTATE_BNDREGS            (1ULL << _XSTATE_BNDREGS)

> +#define _XSTATE_BNDCSR            4

> +#define XSTATE_BNDCSR             (1ULL << _XSTATE_BNDCSR)

> +#define _XSTATE_OPMASK            5

> +#define XSTATE_OPMASK             (1ULL << _XSTATE_OPMASK)

> +#define _XSTATE_ZMM               6

> +#define XSTATE_ZMM                (1ULL << _XSTATE_ZMM)

> +#define _XSTATE_HI_ZMM            7

> +#define XSTATE_HI_ZMM             (1ULL << _XSTATE_HI_ZMM)

> +#define _XSTATE_PKRU              9

> +#define XSTATE_PKRU               (1ULL << _XSTATE_PKRU)

> +#define _XSTATE_LWP               62

> +#define XSTATE_LWP                (1ULL << _XSTATE_LWP)

> +

>  #endif	/* __XEN_X86_DEFNS_H__ */

> --- a/xen/include/asm-x86/xstate.h

> +++ b/xen/include/asm-x86/xstate.h

> @@ -10,6 +10,7 @@

> 

>  #include <xen/sched.h>

>  #include <asm/cpufeature.h>

> +#include <asm/x86-defns.h>

> 

>  #define FCW_DEFAULT               0x037f

>  #define FCW_RESET                 0x0040

> @@ -28,27 +29,6 @@ extern uint32_t mxcsr_mask;

>  #define XSAVE_HDR_OFFSET          FXSAVE_SIZE

>  #define XSTATE_AREA_MIN_SIZE      (FXSAVE_SIZE + XSAVE_HDR_SIZE)

> 

> -#define _XSTATE_FP                0

> -#define XSTATE_FP                 (1ULL << _XSTATE_FP)

> -#define _XSTATE_SSE               1

> -#define XSTATE_SSE                (1ULL << _XSTATE_SSE)

> -#define _XSTATE_YMM               2

> -#define XSTATE_YMM                (1ULL << _XSTATE_YMM)

> -#define _XSTATE_BNDREGS           3

> -#define XSTATE_BNDREGS            (1ULL << _XSTATE_BNDREGS)

> -#define _XSTATE_BNDCSR            4

> -#define XSTATE_BNDCSR             (1ULL << _XSTATE_BNDCSR)

> -#define _XSTATE_OPMASK            5

> -#define XSTATE_OPMASK             (1ULL << _XSTATE_OPMASK)

> -#define _XSTATE_ZMM               6

> -#define XSTATE_ZMM                (1ULL << _XSTATE_ZMM)

> -#define _XSTATE_HI_ZMM            7

> -#define XSTATE_HI_ZMM             (1ULL << _XSTATE_HI_ZMM)

> -#define _XSTATE_PKRU              9

> -#define XSTATE_PKRU               (1ULL << _XSTATE_PKRU)

> -#define _XSTATE_LWP               62

> -#define XSTATE_LWP                (1ULL << _XSTATE_LWP)

> -

>  #define XSTATE_FP_SSE  (XSTATE_FP | XSTATE_SSE)

>  #define XCNTXT_MASK    (XSTATE_FP | XSTATE_SSE | XSTATE_YMM |

> XSTATE_OPMASK | \

>                          XSTATE_ZMM | XSTATE_HI_ZMM | XSTATE_NONLAZY)

> --- a/xen/include/public/trace.h

> +++ b/xen/include/public/trace.h

> @@ -235,6 +235,8 @@

>  #define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)

>  #define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)

>  #define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)

> +#define TRC_HVM_XCR_READ64      (TRC_HVM_HANDLER + TRC_64_FLAG +

> 0x26)

> +#define TRC_HVM_XCR_WRITE64     (TRC_HVM_HANDLER + TRC_64_FLAG

> + 0x27)

> 

>  #define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)

>  #define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)

> 

> 

> 

> _______________________________________________

> Xen-devel mailing list

> Xen-devel@lists.xen.org

> https://lists.xen.org/xen-devel
diff mbox

Patch

--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -435,6 +435,8 @@  static int fuzz_write_cr(
     return X86EMUL_OKAY;
 }
 
+#define fuzz_read_xcr emul_test_read_xcr
+
 enum {
     MSRI_IA32_SYSENTER_CS,
     MSRI_IA32_SYSENTER_ESP,
@@ -553,6 +555,7 @@  static const struct x86_emulate_ops all_
     SET(write_io),
     SET(read_cr),
     SET(write_cr),
+    SET(read_xcr),
     SET(read_msr),
     SET(write_msr),
     SET(wbinvd),
@@ -661,6 +664,7 @@  enum {
     HOOK_write_cr,
     HOOK_read_dr,
     HOOK_write_dr,
+    HOOK_read_xcr,
     HOOK_read_msr,
     HOOK_write_msr,
     HOOK_wbinvd,
@@ -705,6 +709,7 @@  static void disable_hooks(struct x86_emu
     MAYBE_DISABLE_HOOK(write_io);
     MAYBE_DISABLE_HOOK(read_cr);
     MAYBE_DISABLE_HOOK(write_cr);
+    MAYBE_DISABLE_HOOK(read_xcr);
     MAYBE_DISABLE_HOOK(read_msr);
     MAYBE_DISABLE_HOOK(write_msr);
     MAYBE_DISABLE_HOOK(wbinvd);
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -368,6 +368,7 @@  static struct x86_emulate_ops emulops =
     .read_segment = read_segment,
     .cpuid      = emul_test_cpuid,
     .read_cr    = emul_test_read_cr,
+    .read_xcr   = emul_test_read_xcr,
     .read_msr   = read_msr,
     .get_fpu    = emul_test_get_fpu,
     .put_fpu    = emul_test_put_fpu,
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -120,6 +120,19 @@  int emul_test_read_cr(
     return X86EMUL_UNHANDLEABLE;
 }
 
+int emul_test_read_xcr(
+    unsigned int reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    uint32_t lo, hi;
+
+    asm ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (reg) );
+    *val = lo | ((uint64_t)hi << 32);
+
+    return X86EMUL_OKAY;
+}
+
 int emul_test_get_fpu(
     void (*exception_callback)(void *, struct cpu_user_regs *),
     void *exception_callback_arg,
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -215,6 +215,11 @@  int emul_test_read_cr(
     unsigned long *val,
     struct x86_emulate_ctxt *ctxt);
 
+int emul_test_read_xcr(
+    unsigned int reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt);
+
 int emul_test_get_fpu(
     void (*exception_callback)(void *, struct cpu_user_regs *),
     void *exception_callback_arg,
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1655,6 +1655,49 @@  static int hvmemul_write_cr(
     return rc;
 }
 
+static int hvmemul_read_xcr(
+    unsigned int reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    uint32_t lo, hi;
+
+    switch ( reg )
+    {
+    case 0:
+        *val = current->arch.xcr0;
+        return X86EMUL_OKAY;
+
+    case 1:
+        if ( !cpu_has_xgetbv1 )
+            return X86EMUL_UNHANDLEABLE;
+        break;
+
+    default:
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    asm ( ".byte 0x0f,0x01,0xd0" /* xgetbv */
+          : "=a" (lo), "=d" (hi) : "c" (reg) );
+    *val = lo | ((uint64_t)hi << 32);
+    HVMTRACE_LONG_2D(XCR_READ, reg, TRC_PAR_LONG(*val));
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_xcr(
+    unsigned int reg,
+    uint64_t val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    HVMTRACE_LONG_2D(XCR_WRITE, reg, TRC_PAR_LONG(val));
+    if ( likely(handle_xsetbv(reg, val) == 0) )
+        return X86EMUL_OKAY;
+
+    x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
+    return X86EMUL_EXCEPTION;
+}
+
 static int hvmemul_read_msr(
     unsigned int reg,
     uint64_t *val,
@@ -1703,22 +1746,6 @@  static int hvmemul_get_fpu(
 {
     struct vcpu *curr = current;
 
-    switch ( type )
-    {
-    case X86EMUL_FPU_fpu:
-    case X86EMUL_FPU_wait:
-    case X86EMUL_FPU_mmx:
-    case X86EMUL_FPU_xmm:
-        break;
-    case X86EMUL_FPU_ymm:
-        if ( !(curr->arch.xcr0 & XSTATE_SSE) ||
-             !(curr->arch.xcr0 & XSTATE_YMM) )
-            return X86EMUL_UNHANDLEABLE;
-        break;
-    default:
-        return X86EMUL_UNHANDLEABLE;
-    }
-
     if ( !curr->fpu_dirtied )
         hvm_funcs.fpu_dirty_intercept();
     else if ( type == X86EMUL_FPU_fpu )
@@ -1902,6 +1929,8 @@  static const struct x86_emulate_ops hvm_
     .write_io      = hvmemul_write_io,
     .read_cr       = hvmemul_read_cr,
     .write_cr      = hvmemul_write_cr,
+    .read_xcr      = hvmemul_read_xcr,
+    .write_xcr     = hvmemul_write_xcr,
     .read_msr      = hvmemul_read_msr,
     .write_msr     = hvmemul_write_msr,
     .wbinvd        = hvmemul_wbinvd,
@@ -1927,6 +1956,8 @@  static const struct x86_emulate_ops hvm_
     .write_io      = hvmemul_write_io_discard,
     .read_cr       = hvmemul_read_cr,
     .write_cr      = hvmemul_write_cr,
+    .read_xcr      = hvmemul_read_xcr,
+    .write_xcr     = hvmemul_write_xcr,
     .read_msr      = hvmemul_read_msr,
     .write_msr     = hvmemul_write_msr_discard,
     .wbinvd        = hvmemul_wbinvd_discard,
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -36,6 +36,7 @@ 
 #include <asm/shared.h>
 #include <asm/traps.h>
 #include <asm/x86_emulate.h>
+#include <asm/xstate.h>
 
 #include <xsm/xsm.h>
 
@@ -817,6 +818,16 @@  static int write_dr(unsigned int reg, un
            ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
 }
 
+static int write_xcr(unsigned int reg, uint64_t val,
+                     struct x86_emulate_ctxt *ctxt)
+{
+    if ( likely(handle_xsetbv(reg, val) == 0) )
+        return X86EMUL_OKAY;
+
+    x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
+    return X86EMUL_EXCEPTION;
+}
+
 static inline uint64_t guest_misc_enable(uint64_t val)
 {
     val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
@@ -1329,6 +1340,7 @@  static const struct x86_emulate_ops priv
     .write_cr            = write_cr,
     .read_dr             = read_dr,
     .write_dr            = write_dr,
+    .write_xcr           = write_xcr,
     .read_msr            = read_msr,
     .write_msr           = write_msr,
     .cpuid               = pv_emul_cpuid,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1117,10 +1117,27 @@  static int _get_fpu(
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
+    uint64_t xcr0;
     int rc;
 
     fail_if(!ops->get_fpu);
     ASSERT(type != X86EMUL_FPU_none);
+
+    if ( type < X86EMUL_FPU_ymm || !ops->read_xcr ||
+         ops->read_xcr(0, &xcr0, ctxt) != X86EMUL_OKAY )
+        xcr0 = 0;
+
+    switch ( type )
+    {
+    case X86EMUL_FPU_ymm:
+        if ( !(xcr0 & XSTATE_SSE) || !(xcr0 & XSTATE_YMM) )
+            return X86EMUL_UNHANDLEABLE;
+        break;
+
+    default:
+        break;
+    }
+
     rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
 
     if ( rc == X86EMUL_OKAY )
@@ -1648,7 +1665,8 @@  in_protmode(
 #define EBX 3
 
 static bool vcpu_has(
-    unsigned int eax,
+    unsigned int leaf,
+    unsigned int subleaf,
     unsigned int reg,
     unsigned int bit,
     struct x86_emulate_ctxt *ctxt,
@@ -1658,7 +1676,7 @@  static bool vcpu_has(
     int rc = X86EMUL_OKAY;
 
     fail_if(!ops->cpuid);
-    rc = ops->cpuid(eax, 0, &res, ctxt);
+    rc = ops->cpuid(leaf, subleaf, &res, ctxt);
     if ( rc == X86EMUL_OKAY )
     {
         switch ( reg )
@@ -1677,53 +1695,56 @@  static bool vcpu_has(
     return rc == X86EMUL_OKAY;
 }
 
-#define vcpu_has_fpu()         vcpu_has(         1, EDX,  0, ctxt, ops)
-#define vcpu_has_sep()         vcpu_has(         1, EDX, 11, ctxt, ops)
-#define vcpu_has_cx8()         vcpu_has(         1, EDX,  8, ctxt, ops)
-#define vcpu_has_cmov()        vcpu_has(         1, EDX, 15, ctxt, ops)
-#define vcpu_has_clflush()     vcpu_has(         1, EDX, 19, ctxt, ops)
-#define vcpu_has_mmx()         vcpu_has(         1, EDX, 23, ctxt, ops)
-#define vcpu_has_sse()         vcpu_has(         1, EDX, 25, ctxt, ops)
-#define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)
-#define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)
-#define vcpu_has_pclmulqdq()   vcpu_has(         1, ECX,  1, ctxt, ops)
-#define vcpu_has_ssse3()       vcpu_has(         1, ECX,  9, ctxt, ops)
-#define vcpu_has_fma()         vcpu_has(         1, ECX, 12, ctxt, ops)
-#define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
-#define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)
-#define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
-#define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
-#define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
-#define vcpu_has_aesni()       vcpu_has(         1, ECX, 25, ctxt, ops)
-#define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
-#define vcpu_has_f16c()        vcpu_has(         1, ECX, 29, ctxt, ops)
-#define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
-#define vcpu_has_mmxext()     (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
+#define X 0 /* Just for documentation purposes. */
+
+#define vcpu_has_fpu()         vcpu_has(         1, X, EDX,  0, ctxt, ops)
+#define vcpu_has_sep()         vcpu_has(         1, X, EDX, 11, ctxt, ops)
+#define vcpu_has_cx8()         vcpu_has(         1, X, EDX,  8, ctxt, ops)
+#define vcpu_has_cmov()        vcpu_has(         1, X, EDX, 15, ctxt, ops)
+#define vcpu_has_clflush()     vcpu_has(         1, X, EDX, 19, ctxt, ops)
+#define vcpu_has_mmx()         vcpu_has(         1, X, EDX, 23, ctxt, ops)
+#define vcpu_has_sse()         vcpu_has(         1, X, EDX, 25, ctxt, ops)
+#define vcpu_has_sse2()        vcpu_has(         1, X, EDX, 26, ctxt, ops)
+#define vcpu_has_sse3()        vcpu_has(         1, X, ECX,  0, ctxt, ops)
+#define vcpu_has_pclmulqdq()   vcpu_has(         1, X, ECX,  1, ctxt, ops)
+#define vcpu_has_ssse3()       vcpu_has(         1, X, ECX,  9, ctxt, ops)
+#define vcpu_has_fma()         vcpu_has(         1, X, ECX, 12, ctxt, ops)
+#define vcpu_has_cx16()        vcpu_has(         1, X, ECX, 13, ctxt, ops)
+#define vcpu_has_sse4_1()      vcpu_has(         1, X, ECX, 19, ctxt, ops)
+#define vcpu_has_sse4_2()      vcpu_has(         1, X, ECX, 20, ctxt, ops)
+#define vcpu_has_movbe()       vcpu_has(         1, X, ECX, 22, ctxt, ops)
+#define vcpu_has_popcnt()      vcpu_has(         1, X, ECX, 23, ctxt, ops)
+#define vcpu_has_aesni()       vcpu_has(         1, X, ECX, 25, ctxt, ops)
+#define vcpu_has_avx()         vcpu_has(         1, X, ECX, 28, ctxt, ops)
+#define vcpu_has_f16c()        vcpu_has(         1, X, ECX, 29, ctxt, ops)
+#define vcpu_has_rdrand()      vcpu_has(         1, X, ECX, 30, ctxt, ops)
+#define vcpu_has_mmxext()     (vcpu_has(0x80000001, X, EDX, 22, ctxt, ops) || \
                                vcpu_has_sse())
-#define vcpu_has_3dnow_ext()   vcpu_has(0x80000001, EDX, 30, ctxt, ops)
-#define vcpu_has_3dnow()       vcpu_has(0x80000001, EDX, 31, ctxt, ops)
-#define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
-#define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
-#define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
-#define vcpu_has_sse4a()       vcpu_has(0x80000001, ECX,  6, ctxt, ops)
-#define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
-#define vcpu_has_xop()         vcpu_has(0x80000001, ECX, 12, ctxt, ops)
-#define vcpu_has_fma4()        vcpu_has(0x80000001, ECX, 16, ctxt, ops)
-#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
-#define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
-#define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
-#define vcpu_has_avx2()        vcpu_has(         7, EBX,  5, ctxt, ops)
-#define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
-#define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
-#define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
-#define vcpu_has_rdseed()      vcpu_has(         7, EBX, 18, ctxt, ops)
-#define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
-#define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
-#define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
-#define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)
-#define vcpu_has_sha()         vcpu_has(         7, EBX, 29, ctxt, ops)
-#define vcpu_has_rdpid()       vcpu_has(         7, ECX, 22, ctxt, ops)
-#define vcpu_has_clzero()      vcpu_has(0x80000008, EBX,  0, ctxt, ops)
+#define vcpu_has_3dnow_ext()   vcpu_has(0x80000001, X, EDX, 30, ctxt, ops)
+#define vcpu_has_3dnow()       vcpu_has(0x80000001, X, EDX, 31, ctxt, ops)
+#define vcpu_has_lahf_lm()     vcpu_has(0x80000001, X, ECX,  0, ctxt, ops)
+#define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, X, ECX,  4, ctxt, ops)
+#define vcpu_has_lzcnt()       vcpu_has(0x80000001, X, ECX,  5, ctxt, ops)
+#define vcpu_has_sse4a()       vcpu_has(0x80000001, X, ECX,  6, ctxt, ops)
+#define vcpu_has_misalignsse() vcpu_has(0x80000001, X, ECX,  7, ctxt, ops)
+#define vcpu_has_xop()         vcpu_has(0x80000001, X, ECX, 12, ctxt, ops)
+#define vcpu_has_fma4()        vcpu_has(0x80000001, X, ECX, 16, ctxt, ops)
+#define vcpu_has_tbm()         vcpu_has(0x80000001, X, ECX, 21, ctxt, ops)
+#define vcpu_has_bmi1()        vcpu_has(         7, 0, EBX,  3, ctxt, ops)
+#define vcpu_has_hle()         vcpu_has(         7, 0, EBX,  4, ctxt, ops)
+#define vcpu_has_avx2()        vcpu_has(         7, 0, EBX,  5, ctxt, ops)
+#define vcpu_has_bmi2()        vcpu_has(         7, 0, EBX,  8, ctxt, ops)
+#define vcpu_has_rtm()         vcpu_has(         7, 0, EBX, 11, ctxt, ops)
+#define vcpu_has_mpx()         vcpu_has(         7, 0, EBX, 14, ctxt, ops)
+#define vcpu_has_rdseed()      vcpu_has(         7, 0, EBX, 18, ctxt, ops)
+#define vcpu_has_adx()         vcpu_has(         7, 0, EBX, 19, ctxt, ops)
+#define vcpu_has_smap()        vcpu_has(         7, 0, EBX, 20, ctxt, ops)
+#define vcpu_has_clflushopt()  vcpu_has(         7, 0, EBX, 23, ctxt, ops)
+#define vcpu_has_clwb()        vcpu_has(         7, 0, EBX, 24, ctxt, ops)
+#define vcpu_has_sha()         vcpu_has(         7, 0, EBX, 29, ctxt, ops)
+#define vcpu_has_rdpid()       vcpu_has(         7, 0, ECX, 22, ctxt, ops)
+#define vcpu_has_xgetbv1()     vcpu_has(       0xd, 1, EAX,  2, ctxt, ops)
+#define vcpu_has_clzero()      vcpu_has(0x80000008, X, EBX,  0, ctxt, ops)
 
 #define vcpu_must_have(feat) \
     generate_exception_if(!vcpu_has_##feat(), EXC_UD)
@@ -5158,18 +5179,33 @@  x86_emulate(
                 _regs.eflags |= X86_EFLAGS_AC;
             break;
 
-#ifdef __XEN__
-        case 0xd1: /* xsetbv */
+        case 0xd0: /* xgetbv */
             generate_exception_if(vex.pfx, EXC_UD);
-            if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+            if ( !ops->read_cr || !ops->read_xcr ||
+                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
                 cr4 = 0;
             generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
-            generate_exception_if(!mode_ring0() ||
-                                  handle_xsetbv(_regs.ecx,
-                                                _regs.eax | (_regs.rdx << 32)),
+            generate_exception_if(_regs.ecx > (vcpu_has_xgetbv1() ? 1 : 0),
                                   EXC_GP, 0);
+            rc = ops->read_xcr(_regs.ecx, &msr_val, ctxt);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
+            _regs.r(ax) = (uint32_t)msr_val;
+            _regs.r(dx) = msr_val >> 32;
+            break;
+
+        case 0xd1: /* xsetbv */
+            generate_exception_if(vex.pfx, EXC_UD);
+            if ( !ops->read_cr || !ops->write_xcr ||
+                 ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+                cr4 = 0;
+            generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
+            generate_exception_if(!mode_ring0() || _regs.ecx, EXC_GP, 0);
+            rc = ops->write_xcr(_regs.ecx,
+                                _regs.eax | ((uint64_t)_regs.edx << 32), ctxt);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
             break;
-#endif
 
         case 0xd4: /* vmfunc */
             generate_exception_if(vex.pfx, EXC_UD);
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -373,6 +373,24 @@  struct x86_emulate_ops
         struct x86_emulate_ctxt *ctxt);
 
     /*
+     * read_xcr: Read from extended control register.
+     *  @reg:   [IN ] Register to read.
+     */
+    int (*read_xcr)(
+        unsigned int reg,
+        uint64_t *val,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
+     * write_xcr: Write to extended control register.
+     *  @reg:   [IN ] Register to write.
+     */
+    int (*write_xcr)(
+        unsigned int reg,
+        uint64_t val,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
      * read_msr: Read from model-specific register.
      *  @reg:   [IN ] Register to read.
      */
--- a/xen/include/asm-x86/hvm/trace.h
+++ b/xen/include/asm-x86/hvm/trace.h
@@ -33,6 +33,8 @@ 
 #define DO_TRC_HVM_CR_WRITE64  DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_DR_READ     DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_DR_WRITE    DEFAULT_HVM_REGACCESS
+#define DO_TRC_HVM_XCR_READ64  DEFAULT_HVM_REGACCESS
+#define DO_TRC_HVM_XCR_WRITE64 DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_MSR_READ    DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_MSR_WRITE   DEFAULT_HVM_REGACCESS
 #define DO_TRC_HVM_RDTSC       DEFAULT_HVM_REGACCESS
--- a/xen/include/asm-x86/x86-defns.h
+++ b/xen/include/asm-x86/x86-defns.h
@@ -66,4 +66,28 @@ 
 #define X86_CR4_SMAP       0x00200000 /* enable SMAP */
 #define X86_CR4_PKE        0x00400000 /* enable PKE */
 
+/*
+ * XSTATE component flags in XCR0
+ */
+#define _XSTATE_FP                0
+#define XSTATE_FP                 (1ULL << _XSTATE_FP)
+#define _XSTATE_SSE               1
+#define XSTATE_SSE                (1ULL << _XSTATE_SSE)
+#define _XSTATE_YMM               2
+#define XSTATE_YMM                (1ULL << _XSTATE_YMM)
+#define _XSTATE_BNDREGS           3
+#define XSTATE_BNDREGS            (1ULL << _XSTATE_BNDREGS)
+#define _XSTATE_BNDCSR            4
+#define XSTATE_BNDCSR             (1ULL << _XSTATE_BNDCSR)
+#define _XSTATE_OPMASK            5
+#define XSTATE_OPMASK             (1ULL << _XSTATE_OPMASK)
+#define _XSTATE_ZMM               6
+#define XSTATE_ZMM                (1ULL << _XSTATE_ZMM)
+#define _XSTATE_HI_ZMM            7
+#define XSTATE_HI_ZMM             (1ULL << _XSTATE_HI_ZMM)
+#define _XSTATE_PKRU              9
+#define XSTATE_PKRU               (1ULL << _XSTATE_PKRU)
+#define _XSTATE_LWP               62
+#define XSTATE_LWP                (1ULL << _XSTATE_LWP)
+
 #endif	/* __XEN_X86_DEFNS_H__ */
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -10,6 +10,7 @@ 
 
 #include <xen/sched.h>
 #include <asm/cpufeature.h>
+#include <asm/x86-defns.h>
 
 #define FCW_DEFAULT               0x037f
 #define FCW_RESET                 0x0040
@@ -28,27 +29,6 @@  extern uint32_t mxcsr_mask;
 #define XSAVE_HDR_OFFSET          FXSAVE_SIZE
 #define XSTATE_AREA_MIN_SIZE      (FXSAVE_SIZE + XSAVE_HDR_SIZE)
 
-#define _XSTATE_FP                0
-#define XSTATE_FP                 (1ULL << _XSTATE_FP)
-#define _XSTATE_SSE               1
-#define XSTATE_SSE                (1ULL << _XSTATE_SSE)
-#define _XSTATE_YMM               2
-#define XSTATE_YMM                (1ULL << _XSTATE_YMM)
-#define _XSTATE_BNDREGS           3
-#define XSTATE_BNDREGS            (1ULL << _XSTATE_BNDREGS)
-#define _XSTATE_BNDCSR            4
-#define XSTATE_BNDCSR             (1ULL << _XSTATE_BNDCSR)
-#define _XSTATE_OPMASK            5
-#define XSTATE_OPMASK             (1ULL << _XSTATE_OPMASK)
-#define _XSTATE_ZMM               6
-#define XSTATE_ZMM                (1ULL << _XSTATE_ZMM)
-#define _XSTATE_HI_ZMM            7
-#define XSTATE_HI_ZMM             (1ULL << _XSTATE_HI_ZMM)
-#define _XSTATE_PKRU              9
-#define XSTATE_PKRU               (1ULL << _XSTATE_PKRU)
-#define _XSTATE_LWP               62
-#define XSTATE_LWP                (1ULL << _XSTATE_LWP)
-
 #define XSTATE_FP_SSE  (XSTATE_FP | XSTATE_SSE)
 #define XCNTXT_MASK    (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | \
                         XSTATE_ZMM | XSTATE_HI_ZMM | XSTATE_NONLAZY)
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -235,6 +235,8 @@ 
 #define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)
 #define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)
 #define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)
+#define TRC_HVM_XCR_READ64      (TRC_HVM_HANDLER + TRC_64_FLAG + 0x26)
+#define TRC_HVM_XCR_WRITE64     (TRC_HVM_HANDLER + TRC_64_FLAG + 0x27)
 
 #define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)
 #define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)