[04/16] target-i386: Add XSAVE extension
diff mbox

Message ID 1455039832-9133-5-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Feb. 9, 2016, 5:43 p.m. UTC
This includes XSAVE, XRSTOR, XGETBV, XSETBV, which are all related,
as well as the associate cpuid bits.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-i386/cpu.c        |  39 +++++++------
 target-i386/fpu_helper.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++
 target-i386/helper.c     |  12 ++--
 target-i386/helper.h     |   4 ++
 target-i386/kvm.c        |  11 ++--
 target-i386/translate.c  |  54 ++++++++++++++++++
 6 files changed, 241 insertions(+), 24 deletions(-)

Patch
diff mbox

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 3fa14bf..fb8a646 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -331,14 +331,14 @@  static const char *cpuid_6_feature_name[] = {
 #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
           CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
           CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
+          CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */   \
           CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
           /* missing:
           CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
           CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
           CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
-          CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
-          CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
-          CPUID_EXT_RDRAND */
+          CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX,
+          CPUID_EXT_F16C, CPUID_EXT_RDRAND */
 
 #ifdef TARGET_X86_64
 #define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM)
@@ -2323,10 +2323,13 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         *ebx = (cpu->apic_id << 24) |
                8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
         *ecx = env->features[FEAT_1_ECX];
+        if ((*ecx & CPUID_EXT_XSAVE) && (env->cr[4] & CR4_OSXSAVE_MASK)) {
+            *ecx |= CPUID_EXT_OSXSAVE;
+        }
         *edx = env->features[FEAT_1_EDX];
         if (cs->nr_cores * cs->nr_threads > 1) {
             *ebx |= (cs->nr_cores * cs->nr_threads) << 16;
-            *edx |= 1 << 28;    /* HTT bit */
+            *edx |= CPUID_HT;
         }
         break;
     case 2:
@@ -2450,7 +2453,7 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0xD: {
         KVMState *s = cs->kvm_state;
-        uint64_t kvm_mask;
+        uint64_t ena_mask;
         int i;
 
         /* Processor Extended State */
@@ -2458,35 +2461,39 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         *ebx = 0;
         *ecx = 0;
         *edx = 0;
-        if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) || !kvm_enabled()) {
+        if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
             break;
         }
-        kvm_mask =
-            kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX) |
-            ((uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32);
+        if (kvm_enabled()) {
+            ena_mask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX);
+            ena_mask <<= 32;
+            ena_mask |= kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
+        } else {
+            ena_mask = -1;
+        }
 
         if (count == 0) {
             *ecx = 0x240;
             for (i = 2; i < ARRAY_SIZE(ext_save_areas); i++) {
                 const ExtSaveArea *esa = &ext_save_areas[i];
-                if ((env->features[esa->feature] & esa->bits) == esa->bits &&
-                    (kvm_mask & (1 << i)) != 0) {
+                if ((env->features[esa->feature] & esa->bits) == esa->bits
+                    && ((ena_mask >> i) & 1) != 0) {
                     if (i < 32) {
-                        *eax |= 1 << i;
+                        *eax |= 1u << i;
                     } else {
-                        *edx |= 1 << (i - 32);
+                        *edx |= 1u << (i - 32);
                     }
                     *ecx = MAX(*ecx, esa->offset + esa->size);
                 }
             }
-            *eax |= kvm_mask & (XSTATE_FP | XSTATE_SSE);
+            *eax |= ena_mask & (XSTATE_FP | XSTATE_SSE);
             *ebx = *ecx;
         } else if (count == 1) {
             *eax = env->features[FEAT_XSAVE];
         } else if (count < ARRAY_SIZE(ext_save_areas)) {
             const ExtSaveArea *esa = &ext_save_areas[count];
-            if ((env->features[esa->feature] & esa->bits) == esa->bits &&
-                (kvm_mask & (1 << count)) != 0) {
+            if ((env->features[esa->feature] & esa->bits) == esa->bits
+                && ((ena_mask >> count) & 1) != 0) {
                 *eax = esa->size;
                 *ebx = esa->offset;
             }
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index a7da370..159e1df 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -1190,6 +1190,45 @@  void helper_fxsave(CPUX86State *env, target_ulong ptr)
     }
 }
 
+static uint64_t get_xinuse(CPUX86State *env)
+{
+    /* We don't track XINUSE.  We could calculate it here, but it's
+       probably less work to simply indicate all components in use.  */
+    return -1;
+}
+
+void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+{
+    uintptr_t ra = GETPC();
+    uint64_t old_bv, new_bv;
+
+    /* The OS must have enabled XSAVE.  */
+    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+        raise_exception_ra(env, EXCP06_ILLOP, ra);
+    }
+
+    /* The operand must be 64 byte aligned.  */
+    if (ptr & 63) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    }
+
+    /* Never save anything not enabled by XCR0.  */
+    rfbm &= env->xcr0;
+
+    if (rfbm & XSTATE_FP) {
+        do_xsave_fpu(env, ptr, ra);
+    }
+    if (rfbm & XSTATE_SSE) {
+        do_xsave_mxcsr(env, ptr, ra);
+        do_xsave_sse(env, ptr, ra);
+    }
+
+    /* Update the XSTATE_BV field.  */
+    old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
+    new_bv = (old_bv & ~rfbm) | (get_xinuse(env) & rfbm);
+    cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
+}
+
 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
     int i, fpus, fptag;
@@ -1259,6 +1298,112 @@  void helper_fxrstor(CPUX86State *env, target_ulong ptr)
     }
 }
 
+void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+{
+    uintptr_t ra = GETPC();
+    uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
+
+    rfbm &= env->xcr0;
+
+    /* The OS must have enabled XSAVE.  */
+    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+        raise_exception_ra(env, EXCP06_ILLOP, ra);
+    }
+
+    /* The operand must be 64 byte aligned.  */
+    if (ptr & 63) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    }
+
+    xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
+
+    if ((int64_t)xstate_bv < 0) {
+        /* FIXME: Compact form.  */
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    }
+
+    /* Standard form.  */
+
+    /* The XSTATE field must not set bits not present in XCR0.  */
+    if (xstate_bv & ~env->xcr0) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    }
+
+    /* The XCOMP field must be zero.  */
+    xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
+    xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
+    if (xcomp_bv0 || xcomp_bv1) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    }
+
+    if (rfbm & XSTATE_FP) {
+        if (xstate_bv & XSTATE_FP) {
+            do_xrstor_fpu(env, ptr, ra);
+        } else {
+            helper_fninit(env);
+            memset(env->fpregs, 0, sizeof(env->fpregs));
+        }
+    }
+    if (rfbm & XSTATE_SSE) {
+        /* Note that the standard form of XRSTOR loads MXCSR from memory
+           whether or not the XSTATE_BV bit is set.  */
+        do_xrstor_mxcsr(env, ptr, ra);
+        if (xstate_bv & XSTATE_SSE) {
+            do_xrstor_sse(env, ptr, ra);
+        } else {
+            /* ??? When AVX is implemented, we may have to be more
+               selective in the clearing.  */
+            memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
+        }
+    }
+}
+
+uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
+{
+    /* The OS must have enabled XSAVE.  */
+    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
+    }
+
+    switch (ecx) {
+    case 0:
+        return env->xcr0;
+    case 1:
+        /* FIXME: #GP if !CPUID.(EAX=0DH,ECX=1):EAX.XG1[bit 2].  */
+        return env->xcr0 & get_xinuse(env);
+    }
+    raise_exception_ra(env, EXCP0D_GPF, GETPC());
+}
+
+void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
+{
+    uint32_t dummy, ena_lo, ena_hi;
+    uint64_t ena;
+
+    /* The OS must have enabled XSAVE.  */
+    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
+    }
+
+    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
+    if (ecx != 0 || (mask & XSTATE_FP) == 0) {
+        goto do_gpf;
+    }
+
+    /* Disallow enabling unimplemented features.  */
+    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
+    ena = ((uint64_t)ena_hi << 32) | ena_lo;
+    if (mask & ~ena) {
+        goto do_gpf;
+    }
+
+    env->xcr0 = mask;
+    return;
+
+ do_gpf:
+    raise_exception_ra(env, EXCP0D_GPF, GETPC());
+}
+
 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
 {
     CPU_LDoubleU temp;
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 81568c8..ac47b14 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -647,6 +647,7 @@  void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
 void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
 {
     X86CPU *cpu = x86_env_get_cpu(env);
+    uint32_t hflags;
 
 #if defined(DEBUG_MMU)
     printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
@@ -656,24 +657,27 @@  void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
          CR4_SMEP_MASK | CR4_SMAP_MASK)) {
         tlb_flush(CPU(cpu), 1);
     }
+
+    /* Clear bits we're going to recompute.  */
+    hflags = env->hflags & ~(HF_OSFXSR_MASK | HF_SMAP_MASK);
+
     /* SSE handling */
     if (!(env->features[FEAT_1_EDX] & CPUID_SSE)) {
         new_cr4 &= ~CR4_OSFXSR_MASK;
     }
-    env->hflags &= ~HF_OSFXSR_MASK;
     if (new_cr4 & CR4_OSFXSR_MASK) {
-        env->hflags |= HF_OSFXSR_MASK;
+        hflags |= HF_OSFXSR_MASK;
     }
 
     if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMAP)) {
         new_cr4 &= ~CR4_SMAP_MASK;
     }
-    env->hflags &= ~HF_SMAP_MASK;
     if (new_cr4 & CR4_SMAP_MASK) {
-        env->hflags |= HF_SMAP_MASK;
+        hflags |= HF_SMAP_MASK;
     }
 
     env->cr[4] = new_cr4;
+    env->hflags = hflags;
 }
 
 #if defined(CONFIG_USER_ONLY)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 6109e46..9dfc735 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -187,6 +187,10 @@  DEF_HELPER_3(fsave, void, env, tl, int)
 DEF_HELPER_3(frstor, void, env, tl, int)
 DEF_HELPER_FLAGS_2(fxsave, TCG_CALL_NO_WG, void, env, tl)
 DEF_HELPER_FLAGS_2(fxrstor, TCG_CALL_NO_WG, void, env, tl)
+DEF_HELPER_FLAGS_3(xsave, TCG_CALL_NO_WG, void, env, tl, i64)
+DEF_HELPER_FLAGS_3(xrstor, TCG_CALL_NO_WG, void, env, tl, i64)
+DEF_HELPER_FLAGS_2(xgetbv, TCG_CALL_NO_WG, i64, env, i32)
+DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64)
 
 DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 94024bc..fca0314 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1855,13 +1855,16 @@  static int kvm_get_sregs(X86CPU *cpu)
        HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
        HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
 
-    hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags = env->hflags & HFLAG_COPY_MASK;
+    hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
     hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
     hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
                 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
     hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
-    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
-                (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
+
+    if (env->cr[4] & CR4_OSFXSR_MASK) {
+        hflags |= HF_OSFXSR_MASK;
+    }
 
     if (env->efer & MSR_EFER_LMA) {
         hflags |= HF_LMA_MASK;
@@ -1882,7 +1885,7 @@  static int kvm_get_sregs(X86CPU *cpu)
                         env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
         }
     }
-    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
+    env->hflags = hflags;
 
     return 0;
 }
diff --git a/target-i386/translate.c b/target-i386/translate.c
index b5b85fd..f5233aa 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7075,6 +7075,36 @@  static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
             break;
 
+        case 0xd0: /* xgetbv */
+            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
+                goto illegal_op;
+            }
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
+            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+            break;
+
+        case 0xd1: /* xsetbv */
+            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
+                goto illegal_op;
+            }
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                                  cpu_regs[R_EDX]);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+            /* End TB because translation flags may change.  */
+            gen_jmp_im(s->pc - pc_start);
+            gen_eob(s);
+            break;
+
         case 0xd8: /* VMRUN */
             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
                 goto illegal_op;
@@ -7576,6 +7606,30 @@  static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
             break;
 
+        CASE_MEM_OP(4): /* xsave */
+            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
+                                | PREFIX_REPZ | PREFIX_REPNZ))) {
+                goto illegal_op;
+            }
+            gen_lea_modrm(env, s, modrm);
+            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                                  cpu_regs[R_EDX]);
+            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
+            break;
+
+        CASE_MEM_OP(5): /* xrstor */
+            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
+                                | PREFIX_REPZ | PREFIX_REPNZ))) {
+                goto illegal_op;
+            }
+            gen_lea_modrm(env, s, modrm);
+            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                                  cpu_regs[R_EDX]);
+            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
+            break;
+
         CASE_MEM_OP(6): /* clwb */
             if (prefixes & PREFIX_LOCK) {
                 goto illegal_op;