Message ID | 1414033363-31032-1-git-send-email-chao.p.peng@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 10/23/2014 05:02 AM, Chao Peng wrote: > Add AVX512 feature bits, register definition and corresponding > xsave/vmstate support. > > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> > --- > target-i386/cpu.c | 10 ++++-- > target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++ > target-i386/kvm.c | 19 +++++++++++ > target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 175 insertions(+), 2 deletions(-) Looks good---Eduardo, Juan, can you look at it too? Documentation is at https://software.intel.com/sites/default/files/managed/68/8b/319433-019.pdf See Tables 2-5, 3-6, 3-7, 3-8. Paolo > diff --git a/target-i386/cpu.c b/target-i386/cpu.c > index e7bf9de..e91bfbd 100644 > --- a/target-i386/cpu.c > +++ b/target-i386/cpu.c > @@ -259,8 +259,8 @@ static const char *svm_feature_name[] = { > static const char *cpuid_7_0_ebx_feature_name[] = { > "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep", > "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL, > - NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, > - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, > + "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, > + NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL, > }; > > static const char *cpuid_apm_edx_feature_name[] = { > @@ -426,6 +426,12 @@ static const ExtSaveArea ext_save_areas[] = { > .offset = 0x3c0, .size = 0x40 }, > [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, > .offset = 0x400, .size = 0x40 }, > + [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, > + .offset = 0x440, .size = 0x40 }, > + [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, > + .offset = 0x480, .size = 0x200 }, > + [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, > + .offset = 0x680, .size = 0x400 }, > }; > > const char *get_register_name_32(unsigned int reg) > diff --git a/target-i386/cpu.h b/target-i386/cpu.h > index 2968749..9f01831 100644 > --- a/target-i386/cpu.h > +++ b/target-i386/cpu.h > @@ -395,6 +395,9 @@ > #define XSTATE_YMM (1ULL << 2) > #define XSTATE_BNDREGS (1ULL << 3) > #define XSTATE_BNDCSR (1ULL << 4) > +#define XSTATE_OPMASK (1ULL << 5) > +#define XSTATE_ZMM_Hi256 (1ULL << 6) > +#define XSTATE_Hi16_ZMM (1ULL << 7) > > > /* CPUID feature words */ > @@ -560,9 +563,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; > #define CPUID_7_0_EBX_INVPCID (1U << 10) > #define CPUID_7_0_EBX_RTM (1U << 11) > #define CPUID_7_0_EBX_MPX (1U << 14) > +#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */ > #define CPUID_7_0_EBX_RDSEED (1U << 18) > #define CPUID_7_0_EBX_ADX (1U << 19) > #define CPUID_7_0_EBX_SMAP (1U << 20) > +#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */ > +#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ > +#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ > > /* CPUID[0x80000007].EDX flags: */ > #define CPUID_APM_INVTSC (1U << 8) > @@ -707,6 +714,24 @@ typedef union { > } XMMReg; > > typedef union { > + uint8_t _b[32]; > + uint16_t _w[16]; > + uint32_t _l[8]; > + uint64_t _q[4]; > + float32 _s[8]; > + float64 _d[4]; > +} YMMReg; > + > +typedef union { > + uint8_t _b[64]; > + uint16_t _w[32]; > + uint32_t _l[16]; > + uint64_t _q[8]; > + float32 _s[16]; > + float64 _d[8]; > +} ZMMReg; > + > +typedef union { > uint8_t _b[8]; > uint16_t _w[4]; > uint32_t _l[2]; > @@ -725,6 +750,20 @@ typedef struct BNDCSReg { > } BNDCSReg; > > #ifdef HOST_WORDS_BIGENDIAN > +#define ZMM_B(n) _b[63 - (n)] > +#define ZMM_W(n) _w[31 - (n)] > +#define ZMM_L(n) _l[15 - (n)] > +#define ZMM_S(n) _s[15 - (n)] > +#define ZMM_Q(n) _q[7 - (n)] > +#define ZMM_D(n) _d[7 - (n)] > + > +#define YMM_B(n) _b[31 - (n)] > +#define YMM_W(n) _w[15 - (n)] > +#define YMM_L(n) _l[7 - (n)] > +#define YMM_S(n) _s[7 - (n)] > +#define YMM_Q(n) _q[3 - (n)] > +#define YMM_D(n) _d[3 - (n)] > + > #define XMM_B(n) _b[15 - (n)] > #define XMM_W(n) _w[7 - (n)] > #define XMM_L(n) _l[3 - (n)] > @@ -737,6 +776,20 @@ typedef struct BNDCSReg { > #define MMX_L(n) _l[1 - (n)] > #define MMX_S(n) _s[1 - (n)] > #else > +#define ZMM_B(n) _b[n] > +#define ZMM_W(n) _w[n] > +#define ZMM_L(n) _l[n] > +#define ZMM_S(n) _s[n] > +#define ZMM_Q(n) _q[n] > +#define ZMM_D(n) _d[n] > + > +#define YMM_B(n) _b[n] > +#define YMM_W(n) _w[n] > +#define YMM_L(n) _l[n] > +#define YMM_S(n) _s[n] > +#define YMM_Q(n) _q[n] > +#define YMM_D(n) _d[n] > + > #define XMM_B(n) _b[n] > #define XMM_W(n) _w[n] > #define XMM_L(n) _l[n] > @@ -775,6 +828,8 @@ typedef struct { > > #define NB_MMU_MODES 3 > > +#define NB_OPMASK_REGS 8 > + > typedef enum TPRAccess { > TPR_ACCESS_READ, > TPR_ACCESS_WRITE, > @@ -839,6 +894,12 @@ typedef struct CPUX86State { > > XMMReg ymmh_regs[CPU_NB_REGS]; > > + uint64_t opmask_regs[NB_OPMASK_REGS]; > + YMMReg zmmh_regs[CPU_NB_REGS]; > +#ifdef TARGET_X86_64 > + ZMMReg hi16_zmm_regs[CPU_NB_REGS]; > +#endif > + > /* sysenter registers */ > uint32_t sysenter_cs; > target_ulong sysenter_esp; > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > index ddedc73..ccf36e8 100644 > --- a/target-i386/kvm.c > +++ b/target-i386/kvm.c > @@ -1031,6 +1031,9 @@ static int kvm_put_fpu(X86CPU *cpu) > #define XSAVE_YMMH_SPACE 144 > #define XSAVE_BNDREGS 240 > #define XSAVE_BNDCSR 256 > +#define XSAVE_OPMASK 272 > +#define XSAVE_ZMM_Hi256 288 > +#define XSAVE_Hi16_ZMM 416 > > static int kvm_put_xsave(X86CPU *cpu) > { > @@ -1067,6 +1070,14 @@ static int kvm_put_xsave(X86CPU *cpu) > sizeof env->bnd_regs); > memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs, > sizeof(env->bndcs_regs)); > + memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs, > + sizeof env->opmask_regs); > + memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs, > + sizeof env->zmmh_regs); > +#ifdef TARGET_X86_64 > + memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs, > + sizeof env->hi16_zmm_regs); > +#endif > r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); > return r; > } > @@ -1402,6 +1413,14 @@ static int kvm_get_xsave(X86CPU *cpu) > sizeof env->bnd_regs); > memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR], > sizeof(env->bndcs_regs)); > + memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK], > + sizeof env->opmask_regs); > + memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256], > + sizeof env->zmmh_regs); > +#ifdef TARGET_X86_64 > + memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM], > + sizeof env->hi16_zmm_regs); > +#endif > return 0; > } > > diff --git a/target-i386/machine.c b/target-i386/machine.c > index 0dd49f0..708fc54 100644 > --- a/target-i386/machine.c > +++ b/target-i386/machine.c > @@ -60,6 +60,44 @@ static const VMStateDescription vmstate_ymmh_reg = { > #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \ > VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg) > > +static const VMStateDescription vmstate_zmmh_reg = { > + .name = "zmmh_reg", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField[]) { > + VMSTATE_UINT64(YMM_Q(0), YMMReg), > + VMSTATE_UINT64(YMM_Q(1), YMMReg), > + VMSTATE_UINT64(YMM_Q(2), YMMReg), > + VMSTATE_UINT64(YMM_Q(3), YMMReg), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \ > + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg) > + > +#ifdef TARGET_X86_64 > +static const VMStateDescription vmstate_hi16_zmm_reg = { > + .name = "hi16_zmm_reg", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField[]) { > + VMSTATE_UINT64(ZMM_Q(0), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(1), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(2), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(3), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(4), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(5), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(6), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(7), ZMMReg), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \ > + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg) > +#endif > + > static const VMStateDescription vmstate_bnd_regs = { > .name = "bnd_regs", > .version_id = 1, > @@ -603,6 +641,52 @@ static const VMStateDescription vmstate_msr_hyperv_time = { > } > }; > > +static bool avx512_needed(void *opaque) > +{ > + X86CPU *cpu = opaque; > + CPUX86State *env = &cpu->env; > + unsigned int i; > + > + for (i = 0; i < NB_OPMASK_REGS; i++) { > + if (env->opmask_regs[i]) { > + return true; > + } > + } > + > + for (i = 0; i < CPU_NB_REGS; i++) { > +#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field)) > + if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) || > + ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) { > + return true; > + } > +#ifdef TARGET_X86_64 > +#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field)) > + if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) || > + ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) || > + ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) || > + ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) { > + return true; > + } > +#endif > + } > + > + return false; > +} > + > +static const VMStateDescription vmstate_avx512 = { > + .name = "cpu/avx512", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField[]) { > + VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS), > + VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS), > +#ifdef TARGET_X86_64 > + VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS), > +#endif > + VMSTATE_END_OF_LIST() > + } > +}; > + > VMStateDescription vmstate_x86_cpu = { > .name = "cpu", > .version_id = 12, > @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = { > }, { > .vmsd = &vmstate_msr_hyperv_time, > .needed = hyperv_time_enable_needed, > + }, { > + .vmsd = &vmstate_avx512, > + .needed = avx512_needed, > } , { > /* empty */ > } > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote: [...] > @@ -707,6 +714,24 @@ typedef union { > } XMMReg; > > typedef union { > + uint8_t _b[32]; > + uint16_t _w[16]; > + uint32_t _l[8]; > + uint64_t _q[4]; > + float32 _s[8]; > + float64 _d[4]; > +} YMMReg; > + > +typedef union { > + uint8_t _b[64]; > + uint16_t _w[32]; > + uint32_t _l[16]; > + uint64_t _q[8]; > + float32 _s[16]; > + float64 _d[8]; > +} ZMMReg; > + > +typedef union { > uint8_t _b[8]; > uint16_t _w[4]; > uint32_t _l[2]; > @@ -725,6 +750,20 @@ typedef struct BNDCSReg { > } BNDCSReg; > > #ifdef HOST_WORDS_BIGENDIAN > +#define ZMM_B(n) _b[63 - (n)] > +#define ZMM_W(n) _w[31 - (n)] > +#define ZMM_L(n) _l[15 - (n)] > +#define ZMM_S(n) _s[15 - (n)] > +#define ZMM_Q(n) _q[7 - (n)] > +#define ZMM_D(n) _d[7 - (n)] > + > +#define YMM_B(n) _b[31 - (n)] > +#define YMM_W(n) _w[15 - (n)] > +#define YMM_L(n) _l[7 - (n)] > +#define YMM_S(n) _s[7 - (n)] > +#define YMM_Q(n) _q[3 - (n)] > +#define YMM_D(n) _d[3 - (n)] > + > #define XMM_B(n) _b[15 - (n)] > #define XMM_W(n) _w[7 - (n)] > #define XMM_L(n) _l[3 - (n)] > @@ -737,6 +776,20 @@ typedef struct BNDCSReg { > #define MMX_L(n) _l[1 - (n)] > #define MMX_S(n) _s[1 - (n)] > #else > +#define ZMM_B(n) _b[n] > +#define ZMM_W(n) _w[n] > +#define ZMM_L(n) _l[n] > +#define ZMM_S(n) _s[n] > +#define ZMM_Q(n) _q[n] > +#define ZMM_D(n) _d[n] > + > +#define YMM_B(n) _b[n] > +#define YMM_W(n) _w[n] > +#define YMM_L(n) _l[n] > +#define YMM_S(n) _s[n] > +#define YMM_Q(n) _q[n] > +#define YMM_D(n) _d[n] > + I am probably not being able to see some future use case of those data structures, but: why all the extra complexity here, if only ZMM_Q and YMM_Q are being used in the code, and the only place affected by the ordering of YMMReg and ZMMReg array elements are the memcpy() calls on kvm_{put,get}_xsave(), where the data always have the same layout?
On Thu, Oct 23, 2014 at 05:49:23PM -0200, Eduardo Habkost wrote: > On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote: > [...] > > @@ -707,6 +714,24 @@ typedef union { > > } XMMReg; > > > > typedef union { > > + uint8_t _b[32]; > > + uint16_t _w[16]; > > + uint32_t _l[8]; > > + uint64_t _q[4]; > > + float32 _s[8]; > > + float64 _d[4]; > > +} YMMReg; > > + > > +typedef union { > > + uint8_t _b[64]; > > + uint16_t _w[32]; > > + uint32_t _l[16]; > > + uint64_t _q[8]; > > + float32 _s[16]; > > + float64 _d[8]; > > +} ZMMReg; > > + > > +typedef union { > > uint8_t _b[8]; > > uint16_t _w[4]; > > uint32_t _l[2]; > > @@ -725,6 +750,20 @@ typedef struct BNDCSReg { > > } BNDCSReg; > > > > #ifdef HOST_WORDS_BIGENDIAN > > +#define ZMM_B(n) _b[63 - (n)] > > +#define ZMM_W(n) _w[31 - (n)] > > +#define ZMM_L(n) _l[15 - (n)] > > +#define ZMM_S(n) _s[15 - (n)] > > +#define ZMM_Q(n) _q[7 - (n)] > > +#define ZMM_D(n) _d[7 - (n)] > > + > > +#define YMM_B(n) _b[31 - (n)] > > +#define YMM_W(n) _w[15 - (n)] > > +#define YMM_L(n) _l[7 - (n)] > > +#define YMM_S(n) _s[7 - (n)] > > +#define YMM_Q(n) _q[3 - (n)] > > +#define YMM_D(n) _d[3 - (n)] > > + > > #define XMM_B(n) _b[15 - (n)] > > #define XMM_W(n) _w[7 - (n)] > > #define XMM_L(n) _l[3 - (n)] > > @@ -737,6 +776,20 @@ typedef struct BNDCSReg { > > #define MMX_L(n) _l[1 - (n)] > > #define MMX_S(n) _s[1 - (n)] > > #else > > +#define ZMM_B(n) _b[n] > > +#define ZMM_W(n) _w[n] > > +#define ZMM_L(n) _l[n] > > +#define ZMM_S(n) _s[n] > > +#define ZMM_Q(n) _q[n] > > +#define ZMM_D(n) _d[n] > > + > > +#define YMM_B(n) _b[n] > > +#define YMM_W(n) _w[n] > > +#define YMM_L(n) _l[n] > > +#define YMM_S(n) _s[n] > > +#define YMM_Q(n) _q[n] > > +#define YMM_D(n) _d[n] > > + > > I am probably not being able to see some future use case of those data > structures, but: why all the extra complexity here, if only ZMM_Q and > YMM_Q are being used in the code, and the only place affected by the > ordering of YMMReg and ZMMReg array elements are the memcpy() calls on > kvm_{put,get}_xsave(), where the data always have the same layout? > Thanks Eduardo, then I feel comfortable to drop most of these macros and only keep YMM_Q/ZMM_Q left. As no acutal benefit for ordering, then I will also make these two endiness-insensitive. Chao -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 10/24/2014 03:27 AM, Chao Peng wrote: > On Thu, Oct 23, 2014 at 05:49:23PM -0200, Eduardo Habkost wrote: >> On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote: >> [...] >>> @@ -707,6 +714,24 @@ typedef union { >>> } XMMReg; >>> >>> typedef union { >>> + uint8_t _b[32]; >>> + uint16_t _w[16]; >>> + uint32_t _l[8]; >>> + uint64_t _q[4]; >>> + float32 _s[8]; >>> + float64 _d[4]; >>> +} YMMReg; >>> + >>> +typedef union { >>> + uint8_t _b[64]; >>> + uint16_t _w[32]; >>> + uint32_t _l[16]; >>> + uint64_t _q[8]; >>> + float32 _s[16]; >>> + float64 _d[8]; >>> +} ZMMReg; >>> + >>> +typedef union { >>> uint8_t _b[8]; >>> uint16_t _w[4]; >>> uint32_t _l[2]; >>> @@ -725,6 +750,20 @@ typedef struct BNDCSReg { >>> } BNDCSReg; >>> >>> #ifdef HOST_WORDS_BIGENDIAN >>> +#define ZMM_B(n) _b[63 - (n)] >>> +#define ZMM_W(n) _w[31 - (n)] >>> +#define ZMM_L(n) _l[15 - (n)] >>> +#define ZMM_S(n) _s[15 - (n)] >>> +#define ZMM_Q(n) _q[7 - (n)] >>> +#define ZMM_D(n) _d[7 - (n)] >>> + >>> +#define YMM_B(n) _b[31 - (n)] >>> +#define YMM_W(n) _w[15 - (n)] >>> +#define YMM_L(n) _l[7 - (n)] >>> +#define YMM_S(n) _s[7 - (n)] >>> +#define YMM_Q(n) _q[3 - (n)] >>> +#define YMM_D(n) _d[3 - (n)] >>> + >>> #define XMM_B(n) _b[15 - (n)] >>> #define XMM_W(n) _w[7 - (n)] >>> #define XMM_L(n) _l[3 - (n)] >>> @@ -737,6 +776,20 @@ typedef struct BNDCSReg { >>> #define MMX_L(n) _l[1 - (n)] >>> #define MMX_S(n) _s[1 - (n)] >>> #else >>> +#define ZMM_B(n) _b[n] >>> +#define ZMM_W(n) _w[n] >>> +#define ZMM_L(n) _l[n] >>> +#define ZMM_S(n) _s[n] >>> +#define ZMM_Q(n) _q[n] >>> +#define ZMM_D(n) _d[n] >>> + >>> +#define YMM_B(n) _b[n] >>> +#define YMM_W(n) _w[n] >>> +#define YMM_L(n) _l[n] >>> +#define YMM_S(n) _s[n] >>> +#define YMM_Q(n) _q[n] >>> +#define YMM_D(n) _d[n] >>> + >> >> I am probably not being able to see some future use case of those data >> structures, but: why all the extra complexity here, if only ZMM_Q and >> YMM_Q are being used in the code, and the only place affected by the >> ordering of YMMReg and ZMMReg array elements are the memcpy() calls on >> kvm_{put,get}_xsave(), where the data always have the same layout? >> > > Thanks Eduardo, then I feel comfortable to drop most of these macros and > only keep YMM_Q/ZMM_Q left. As no acutal benefit for ordering, then I > will also make these two endiness-insensitive. I think we can keep the macros. The actual cleanup would be to have a single member for the 32 512-bit ZMM registers, instead of splitting xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_* registers. However, we could not use simple memcpy()s to marshal in and out of the XSAVE data. We can do it in 2.2. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Oct 24, 2014 at 07:55:10AM +0200, Paolo Bonzini wrote: > > > On 10/24/2014 03:27 AM, Chao Peng wrote: > > On Thu, Oct 23, 2014 at 05:49:23PM -0200, Eduardo Habkost wrote: > >> On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote: > >> [...] > >>> @@ -707,6 +714,24 @@ typedef union { > >>> } XMMReg; > >>> > >>> typedef union { > >>> + uint8_t _b[32]; > >>> + uint16_t _w[16]; > >>> + uint32_t _l[8]; > >>> + uint64_t _q[4]; > >>> + float32 _s[8]; > >>> + float64 _d[4]; > >>> +} YMMReg; > >>> + > >>> +typedef union { > >>> + uint8_t _b[64]; > >>> + uint16_t _w[32]; > >>> + uint32_t _l[16]; > >>> + uint64_t _q[8]; > >>> + float32 _s[16]; > >>> + float64 _d[8]; > >>> +} ZMMReg; > >>> + > >>> +typedef union { > >>> uint8_t _b[8]; > >>> uint16_t _w[4]; > >>> uint32_t _l[2]; > >>> @@ -725,6 +750,20 @@ typedef struct BNDCSReg { > >>> } BNDCSReg; > >>> > >>> #ifdef HOST_WORDS_BIGENDIAN > >>> +#define ZMM_B(n) _b[63 - (n)] > >>> +#define ZMM_W(n) _w[31 - (n)] > >>> +#define ZMM_L(n) _l[15 - (n)] > >>> +#define ZMM_S(n) _s[15 - (n)] > >>> +#define ZMM_Q(n) _q[7 - (n)] > >>> +#define ZMM_D(n) _d[7 - (n)] > >>> + > >>> +#define YMM_B(n) _b[31 - (n)] > >>> +#define YMM_W(n) _w[15 - (n)] > >>> +#define YMM_L(n) _l[7 - (n)] > >>> +#define YMM_S(n) _s[7 - (n)] > >>> +#define YMM_Q(n) _q[3 - (n)] > >>> +#define YMM_D(n) _d[3 - (n)] > >>> + > >>> #define XMM_B(n) _b[15 - (n)] > >>> #define XMM_W(n) _w[7 - (n)] > >>> #define XMM_L(n) _l[3 - (n)] > >>> @@ -737,6 +776,20 @@ typedef struct BNDCSReg { > >>> #define MMX_L(n) _l[1 - (n)] > >>> #define MMX_S(n) _s[1 - (n)] > >>> #else > >>> +#define ZMM_B(n) _b[n] > >>> +#define ZMM_W(n) _w[n] > >>> +#define ZMM_L(n) _l[n] > >>> +#define ZMM_S(n) _s[n] > >>> +#define ZMM_Q(n) _q[n] > >>> +#define ZMM_D(n) _d[n] > >>> + > >>> +#define YMM_B(n) _b[n] > >>> +#define YMM_W(n) _w[n] > >>> +#define YMM_L(n) _l[n] > >>> +#define YMM_S(n) _s[n] > >>> +#define YMM_Q(n) _q[n] > >>> +#define YMM_D(n) _d[n] > >>> + > >> > >> I am probably not being able to see some future use case of those data > >> structures, but: why all the extra complexity here, if only ZMM_Q and > >> YMM_Q are being used in the code, and the only place affected by the > >> ordering of YMMReg and ZMMReg array elements are the memcpy() calls on > >> kvm_{put,get}_xsave(), where the data always have the same layout? > >> > > > > Thanks Eduardo, then I feel comfortable to drop most of these macros and > > only keep YMM_Q/ZMM_Q left. As no acutal benefit for ordering, then I > > will also make these two endiness-insensitive. > > I think we can keep the macros. The actual cleanup would be to have a > single member for the 32 512-bit ZMM registers, instead of splitting > xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_* > registers. However, we could not use simple memcpy()s to marshal in and > out of the XSAVE data. Agreed. I don't mind keeping those macros in this patch, as this is just following the existing conventions in the code. Whatever we do to clean that up, we can do it later. > We can do it in 2.2. You mean 2.3, or do you want to clean that up in this release?
On 10/24/2014 01:12 PM, Eduardo Habkost wrote: >> > I think we can keep the macros. The actual cleanup would be to have a >> > single member for the 32 512-bit ZMM registers, instead of splitting >> > xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_* >> > registers. However, we could not use simple memcpy()s to marshal in and >> > out of the XSAVE data. > Agreed. I don't mind keeping those macros in this patch, as this is just > following the existing conventions in the code. Whatever we do to clean > that up, we can do it later. > >> > We can do it in 2.2. > You mean 2.3, or do you want to clean that up in this release? I mean 2.3. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote: > Add AVX512 feature bits, register definition and corresponding > xsave/vmstate support. > > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> > --- [...] > @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = { > }, { > .vmsd = &vmstate_msr_hyperv_time, > .needed = hyperv_time_enable_needed, > + }, { > + .vmsd = &vmstate_avx512, > + .needed = avx512_needed, > } , { > /* empty */ The tab character above needs to be replaced by spaces, but the rest looks good and matches the Intel docs. Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
On Thu, Oct 23, 2014 at 04:34:46PM +0200, Paolo Bonzini wrote: > On 10/23/2014 05:02 AM, Chao Peng wrote: > > Add AVX512 feature bits, register definition and corresponding > > xsave/vmstate support. > > > > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> > > --- > > target-i386/cpu.c | 10 ++++-- > > target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++ > > target-i386/kvm.c | 19 +++++++++++ > > target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ > > 4 files changed, 175 insertions(+), 2 deletions(-) > > Looks good---Eduardo, Juan, can you look at it too? Documentation is at > > https://software.intel.com/sites/default/files/managed/68/8b/319433-019.pdf > > See Tables 2-5, 3-6, 3-7, 3-8. Also, if anybody is confused by the TARGET_X86_64 #ifdefs, see section 1.2.2 32 SIMD Register Support ("The number of available vector registers in 32-bit mode is still 8").
On Fri, Oct 24, 2014 at 02:01:44PM -0200, Eduardo Habkost wrote: > On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote: > > Add AVX512 feature bits, register definition and corresponding > > xsave/vmstate support. > > > > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> > > --- > [...] I agree to the cleanup scheme paolo suggested. And thanks Eduardo for going thru the spec and double checking for this. > > @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = { > > }, { > > .vmsd = &vmstate_msr_hyperv_time, > > .needed = hyperv_time_enable_needed, > > + }, { > > + .vmsd = &vmstate_avx512, > > + .needed = avx512_needed, > > } , { > > /* empty */ > > The tab character above needs to be replaced by spaces, but the rest > looks good and matches the Intel docs. > > Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> If this is the only issue, paolo, could you help on this when you commit? Thank you. Chao -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Oct 24, 2014 at 07:55:10AM +0200, Paolo Bonzini wrote: [...] > I think we can keep the macros. The actual cleanup would be to have a > single member for the 32 512-bit ZMM registers, instead of splitting > xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_* > registers. However, we could not use simple memcpy()s to marshal in and > out of the XSAVE data. We can do it in 2.2. Now, about the actual 2.3 plans: I believe the xmm/ymmh/zmmh/zmm_hi16 separation actually makes the code simpler, because the only code touching those registers inside QEMU is xsave/vmstate state loading/saving, where the bits of the ZMM registers are split into different sections/fields exactly in the same way. We can still eliminate the YMM_* and ZMM_* macros while keeping xmm/ymmh/zmmh/zmm_hi16 as separate fields, and having a single zmm_regs member would just add unnecessary complexity to the XSAVE<->CPUX86State<->VMState translation code. (But if one day TCG starts implementing AVX or AVX512, then it may be worthwhile to move the ZMM registers into a single field.)
On 10/27/2014 04:48 PM, Eduardo Habkost wrote: > On Fri, Oct 24, 2014 at 07:55:10AM +0200, Paolo Bonzini wrote: > [...] >> I think we can keep the macros. The actual cleanup would be to have a >> single member for the 32 512-bit ZMM registers, instead of splitting >> xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_* >> registers. However, we could not use simple memcpy()s to marshal in and >> out of the XSAVE data. We can do it in 2.2. > > Now, about the actual 2.3 plans: I believe the xmm/ymmh/zmmh/zmm_hi16 > separation actually makes the code simpler, because the only code > touching those registers inside QEMU is xsave/vmstate state > loading/saving, where the bits of the ZMM registers are split into > different sections/fields exactly in the same way. > > We can still eliminate the YMM_* and ZMM_* macros while keeping > xmm/ymmh/zmmh/zmm_hi16 as separate fields, and having a single zmm_regs > member would just add unnecessary complexity to the > XSAVE<->CPUX86State<->VMState translation code. > > (But if one day TCG starts implementing AVX or AVX512, then it may be > worthwhile to move the ZMM registers into a single field.) Yes, exactly---or even sooner, if we start dumping the 256 or 512 byte registers. I think the current separation of xmm/ymmh/zmmh/zmm_hi16 is a KVM-ism. Let's see what the patches look like, once they are ready. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote: > Add AVX512 feature bits, register definition and corresponding > xsave/vmstate support. > > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> Thanks! As this was first posted after soft freeze, please resubmit after 2.2 is out. See schedule http://wiki.qemu.org/Planning/2.2 > --- > target-i386/cpu.c | 10 ++++-- > target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++ > target-i386/kvm.c | 19 +++++++++++ > target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 175 insertions(+), 2 deletions(-) > > diff --git a/target-i386/cpu.c b/target-i386/cpu.c > index e7bf9de..e91bfbd 100644 > --- a/target-i386/cpu.c > +++ b/target-i386/cpu.c > @@ -259,8 +259,8 @@ static const char *svm_feature_name[] = { > static const char *cpuid_7_0_ebx_feature_name[] = { > "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep", > "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL, > - NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, > - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, > + "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, > + NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL, > }; > > static const char *cpuid_apm_edx_feature_name[] = { > @@ -426,6 +426,12 @@ static const ExtSaveArea ext_save_areas[] = { > .offset = 0x3c0, .size = 0x40 }, > [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, > .offset = 0x400, .size = 0x40 }, > + [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, > + .offset = 0x440, .size = 0x40 }, > + [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, > + .offset = 0x480, .size = 0x200 }, > + [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, > + .offset = 0x680, .size = 0x400 }, > }; > > const char *get_register_name_32(unsigned int reg) > diff --git a/target-i386/cpu.h b/target-i386/cpu.h > index 2968749..9f01831 100644 > --- a/target-i386/cpu.h > +++ b/target-i386/cpu.h > @@ -395,6 +395,9 @@ > #define XSTATE_YMM (1ULL << 2) > #define XSTATE_BNDREGS (1ULL << 3) > #define XSTATE_BNDCSR (1ULL << 4) > +#define XSTATE_OPMASK (1ULL << 5) > +#define XSTATE_ZMM_Hi256 (1ULL << 6) > +#define XSTATE_Hi16_ZMM (1ULL << 7) > > > /* CPUID feature words */ > @@ -560,9 +563,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; > #define CPUID_7_0_EBX_INVPCID (1U << 10) > #define CPUID_7_0_EBX_RTM (1U << 11) > #define CPUID_7_0_EBX_MPX (1U << 14) > +#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */ > #define CPUID_7_0_EBX_RDSEED (1U << 18) > #define CPUID_7_0_EBX_ADX (1U << 19) > #define CPUID_7_0_EBX_SMAP (1U << 20) > +#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */ > +#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ > +#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ > > /* CPUID[0x80000007].EDX flags: */ > #define CPUID_APM_INVTSC (1U << 8) > @@ -707,6 +714,24 @@ typedef union { > } XMMReg; > > typedef union { > + uint8_t _b[32]; > + uint16_t _w[16]; > + uint32_t _l[8]; > + uint64_t _q[4]; > + float32 _s[8]; > + float64 _d[4]; > +} YMMReg; > + > +typedef union { > + uint8_t _b[64]; > + uint16_t _w[32]; > + uint32_t _l[16]; > + uint64_t _q[8]; > + float32 _s[16]; > + float64 _d[8]; > +} ZMMReg; > + > +typedef union { > uint8_t _b[8]; > uint16_t _w[4]; > uint32_t _l[2]; > @@ -725,6 +750,20 @@ typedef struct BNDCSReg { > } BNDCSReg; > > #ifdef HOST_WORDS_BIGENDIAN > +#define ZMM_B(n) _b[63 - (n)] > +#define ZMM_W(n) _w[31 - (n)] > +#define ZMM_L(n) _l[15 - (n)] > +#define ZMM_S(n) _s[15 - (n)] > +#define ZMM_Q(n) _q[7 - (n)] > +#define ZMM_D(n) _d[7 - (n)] > + > +#define YMM_B(n) _b[31 - (n)] > +#define YMM_W(n) _w[15 - (n)] > +#define YMM_L(n) _l[7 - (n)] > +#define YMM_S(n) _s[7 - (n)] > +#define YMM_Q(n) _q[3 - (n)] > +#define YMM_D(n) _d[3 - (n)] > + > #define XMM_B(n) _b[15 - (n)] > #define XMM_W(n) _w[7 - (n)] > #define XMM_L(n) _l[3 - (n)] > @@ -737,6 +776,20 @@ typedef struct BNDCSReg { > #define MMX_L(n) _l[1 - (n)] > #define MMX_S(n) _s[1 - (n)] > #else > +#define ZMM_B(n) _b[n] > +#define ZMM_W(n) _w[n] > +#define ZMM_L(n) _l[n] > +#define ZMM_S(n) _s[n] > +#define ZMM_Q(n) _q[n] > +#define ZMM_D(n) _d[n] > + > +#define YMM_B(n) _b[n] > +#define YMM_W(n) _w[n] > +#define YMM_L(n) _l[n] > +#define YMM_S(n) _s[n] > +#define YMM_Q(n) _q[n] > +#define YMM_D(n) _d[n] > + > #define XMM_B(n) _b[n] > #define XMM_W(n) _w[n] > #define XMM_L(n) _l[n] > @@ -775,6 +828,8 @@ typedef struct { > > #define NB_MMU_MODES 3 > > +#define NB_OPMASK_REGS 8 > + > typedef enum TPRAccess { > TPR_ACCESS_READ, > TPR_ACCESS_WRITE, > @@ -839,6 +894,12 @@ typedef struct CPUX86State { > > XMMReg ymmh_regs[CPU_NB_REGS]; > > + uint64_t opmask_regs[NB_OPMASK_REGS]; > + YMMReg zmmh_regs[CPU_NB_REGS]; > +#ifdef TARGET_X86_64 > + ZMMReg hi16_zmm_regs[CPU_NB_REGS]; > +#endif > + > /* sysenter registers */ > uint32_t sysenter_cs; > target_ulong sysenter_esp; > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > index ddedc73..ccf36e8 100644 > --- a/target-i386/kvm.c > +++ b/target-i386/kvm.c > @@ -1031,6 +1031,9 @@ static int kvm_put_fpu(X86CPU *cpu) > #define XSAVE_YMMH_SPACE 144 > #define XSAVE_BNDREGS 240 > #define XSAVE_BNDCSR 256 > +#define XSAVE_OPMASK 272 > +#define XSAVE_ZMM_Hi256 288 > +#define XSAVE_Hi16_ZMM 416 > > static int kvm_put_xsave(X86CPU *cpu) > { > @@ -1067,6 +1070,14 @@ static int kvm_put_xsave(X86CPU *cpu) > sizeof env->bnd_regs); > memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs, > sizeof(env->bndcs_regs)); > + memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs, > + sizeof env->opmask_regs); > + memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs, > + sizeof env->zmmh_regs); > +#ifdef TARGET_X86_64 > + memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs, > + sizeof env->hi16_zmm_regs); > +#endif > r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); > return r; > } > @@ -1402,6 +1413,14 @@ static int kvm_get_xsave(X86CPU *cpu) > sizeof env->bnd_regs); > memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR], > sizeof(env->bndcs_regs)); > + memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK], > + sizeof env->opmask_regs); > + memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256], > + sizeof env->zmmh_regs); > +#ifdef TARGET_X86_64 > + memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM], > + sizeof env->hi16_zmm_regs); > +#endif > return 0; > } > > diff --git a/target-i386/machine.c b/target-i386/machine.c > index 0dd49f0..708fc54 100644 > --- a/target-i386/machine.c > +++ b/target-i386/machine.c > @@ -60,6 +60,44 @@ static const VMStateDescription vmstate_ymmh_reg = { > #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \ > VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg) > > +static const VMStateDescription vmstate_zmmh_reg = { > + .name = "zmmh_reg", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField[]) { > + VMSTATE_UINT64(YMM_Q(0), YMMReg), > + VMSTATE_UINT64(YMM_Q(1), YMMReg), > + VMSTATE_UINT64(YMM_Q(2), YMMReg), > + VMSTATE_UINT64(YMM_Q(3), YMMReg), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \ > + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg) > + > +#ifdef TARGET_X86_64 > +static const VMStateDescription vmstate_hi16_zmm_reg = { > + .name = "hi16_zmm_reg", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField[]) { > + VMSTATE_UINT64(ZMM_Q(0), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(1), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(2), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(3), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(4), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(5), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(6), ZMMReg), > + VMSTATE_UINT64(ZMM_Q(7), ZMMReg), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \ > + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg) > +#endif > + > static const VMStateDescription vmstate_bnd_regs = { > .name = "bnd_regs", > .version_id = 1, > @@ -603,6 +641,52 @@ static const VMStateDescription vmstate_msr_hyperv_time = { > } > }; > > +static bool avx512_needed(void *opaque) > +{ > + X86CPU *cpu = opaque; > + CPUX86State *env = &cpu->env; > + unsigned int i; > + > + for (i = 0; i < NB_OPMASK_REGS; i++) { > + if (env->opmask_regs[i]) { > + return true; > + } > + } > + > + for (i = 0; i < CPU_NB_REGS; i++) { > +#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field)) > + if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) || > + ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) { > + return true; > + } > +#ifdef TARGET_X86_64 > +#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field)) > + if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) || > + ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) || > + ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) || > + ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) { > + return true; > + } > +#endif > + } > + > + return false; > +} > + > +static const VMStateDescription vmstate_avx512 = { > + .name = "cpu/avx512", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField[]) { > + VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS), > + VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS), > +#ifdef TARGET_X86_64 > + VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS), > +#endif > + VMSTATE_END_OF_LIST() > + } > +}; > + > VMStateDescription vmstate_x86_cpu = { > .name = "cpu", > .version_id = 12, > @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = { > }, { > .vmsd = &vmstate_msr_hyperv_time, > .needed = hyperv_time_enable_needed, > + }, { > + .vmsd = &vmstate_avx512, > + .needed = avx512_needed, > } , { > /* empty */ > } > -- > 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Nov 02, 2014 at 12:19:09PM +0200, Michael S. Tsirkin wrote: > On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote: > > Add AVX512 feature bits, register definition and corresponding > > xsave/vmstate support. > > > > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> > > Thanks! > > As this was first posted after soft freeze, please > resubmit after 2.2 is out. > > See schedule http://wiki.qemu.org/Planning/2.2 Sorry I didn't notice this. But I think Paolo already merged this patch and it's now in the qemu main tree(9aecd6f). So what else I can do for this? Chao > > > --- > > target-i386/cpu.c | 10 ++++-- > > target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++ > > target-i386/kvm.c | 19 +++++++++++ > > target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ > > 4 files changed, 175 insertions(+), 2 deletions(-) > > > > diff --git a/target-i386/cpu.c b/target-i386/cpu.c > > index e7bf9de..e91bfbd 100644 > > --- a/target-i386/cpu.c > > +++ b/target-i386/cpu.c > > @@ -259,8 +259,8 @@ static const char *svm_feature_name[] = { > > static const char *cpuid_7_0_ebx_feature_name[] = { > > "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep", > > "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL, > > - NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, > > - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, > > + "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, > > + NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL, > > }; > > > > static const char *cpuid_apm_edx_feature_name[] = { > > @@ -426,6 +426,12 @@ static const ExtSaveArea ext_save_areas[] = { > > .offset = 0x3c0, .size = 0x40 }, > > [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, > > .offset = 0x400, .size = 0x40 }, > > + [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, > > + .offset = 0x440, .size = 0x40 }, > > + [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, > > + .offset = 0x480, .size = 0x200 }, > > + [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, > > + .offset = 0x680, .size = 0x400 }, > > }; > > > > const char *get_register_name_32(unsigned int reg) > > diff --git a/target-i386/cpu.h b/target-i386/cpu.h > > index 2968749..9f01831 100644 > > --- a/target-i386/cpu.h > > +++ b/target-i386/cpu.h > > @@ -395,6 +395,9 @@ > > #define XSTATE_YMM (1ULL << 2) > > #define XSTATE_BNDREGS (1ULL << 3) > > #define XSTATE_BNDCSR (1ULL << 4) > > +#define XSTATE_OPMASK (1ULL << 5) > > +#define XSTATE_ZMM_Hi256 (1ULL << 6) > > +#define XSTATE_Hi16_ZMM (1ULL << 7) > > > > > > /* CPUID feature words */ > > @@ -560,9 +563,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; > > #define CPUID_7_0_EBX_INVPCID (1U << 10) > > #define CPUID_7_0_EBX_RTM (1U << 11) > > #define CPUID_7_0_EBX_MPX (1U << 14) > > +#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */ > > #define CPUID_7_0_EBX_RDSEED (1U << 18) > > #define CPUID_7_0_EBX_ADX (1U << 19) > > #define CPUID_7_0_EBX_SMAP (1U << 20) > > +#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */ > > +#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ > > +#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ > > > > /* CPUID[0x80000007].EDX flags: */ > > #define CPUID_APM_INVTSC (1U << 8) > > @@ -707,6 +714,24 @@ typedef union { > > } XMMReg; > > > > typedef union { > > + uint8_t _b[32]; > > + uint16_t _w[16]; > > + uint32_t _l[8]; > > + uint64_t _q[4]; > > + float32 _s[8]; > > + float64 _d[4]; > > +} YMMReg; > > + > > +typedef union { > > + uint8_t _b[64]; > > + uint16_t _w[32]; > > + uint32_t _l[16]; > > + uint64_t _q[8]; > > + float32 _s[16]; > > + float64 _d[8]; > > +} ZMMReg; > > + > > +typedef union { > > uint8_t _b[8]; > > uint16_t _w[4]; > > uint32_t _l[2]; > > @@ -725,6 +750,20 @@ typedef struct BNDCSReg { > > } BNDCSReg; > > > > #ifdef HOST_WORDS_BIGENDIAN > > +#define ZMM_B(n) _b[63 - (n)] > > +#define ZMM_W(n) _w[31 - (n)] > > +#define ZMM_L(n) _l[15 - (n)] > > +#define ZMM_S(n) _s[15 - (n)] > > +#define ZMM_Q(n) _q[7 - (n)] > > +#define ZMM_D(n) _d[7 - (n)] > > + > > +#define YMM_B(n) _b[31 - (n)] > > +#define YMM_W(n) _w[15 - (n)] > > +#define YMM_L(n) _l[7 - (n)] > > +#define YMM_S(n) _s[7 - (n)] > > +#define YMM_Q(n) _q[3 - (n)] > > +#define YMM_D(n) _d[3 - (n)] > > + > > #define XMM_B(n) _b[15 - (n)] > > #define XMM_W(n) _w[7 - (n)] > > #define XMM_L(n) _l[3 - (n)] > > @@ -737,6 +776,20 @@ typedef struct BNDCSReg { > > #define MMX_L(n) _l[1 - (n)] > > #define MMX_S(n) _s[1 - (n)] > > #else > > +#define ZMM_B(n) _b[n] > > +#define ZMM_W(n) _w[n] > > +#define ZMM_L(n) _l[n] > > +#define ZMM_S(n) _s[n] > > +#define ZMM_Q(n) _q[n] > > +#define ZMM_D(n) _d[n] > > + > > +#define YMM_B(n) _b[n] > > +#define YMM_W(n) _w[n] > > +#define YMM_L(n) _l[n] > > +#define YMM_S(n) _s[n] > > +#define YMM_Q(n) _q[n] > > +#define YMM_D(n) _d[n] > > + > > #define XMM_B(n) _b[n] > > #define XMM_W(n) _w[n] > > #define XMM_L(n) _l[n] > > @@ -775,6 +828,8 @@ typedef struct { > > > > #define NB_MMU_MODES 3 > > > > +#define NB_OPMASK_REGS 8 > > + > > typedef enum TPRAccess { > > TPR_ACCESS_READ, > > TPR_ACCESS_WRITE, > > @@ -839,6 +894,12 @@ typedef struct CPUX86State { > > > > XMMReg ymmh_regs[CPU_NB_REGS]; > > > > + uint64_t opmask_regs[NB_OPMASK_REGS]; > > + YMMReg zmmh_regs[CPU_NB_REGS]; > > +#ifdef TARGET_X86_64 > > + ZMMReg hi16_zmm_regs[CPU_NB_REGS]; > > +#endif > > + > > /* sysenter registers */ > > uint32_t sysenter_cs; > > target_ulong sysenter_esp; > > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > > index ddedc73..ccf36e8 100644 > > --- a/target-i386/kvm.c > > +++ b/target-i386/kvm.c > > @@ -1031,6 +1031,9 @@ static int kvm_put_fpu(X86CPU *cpu) > > #define XSAVE_YMMH_SPACE 144 > > #define XSAVE_BNDREGS 240 > > #define XSAVE_BNDCSR 256 > > +#define XSAVE_OPMASK 272 > > +#define XSAVE_ZMM_Hi256 288 > > +#define XSAVE_Hi16_ZMM 416 > > > > static int kvm_put_xsave(X86CPU *cpu) > > { > > @@ -1067,6 +1070,14 @@ static int kvm_put_xsave(X86CPU *cpu) > > sizeof env->bnd_regs); > > memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs, > > sizeof(env->bndcs_regs)); > > + memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs, > > + sizeof env->opmask_regs); > > + memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs, > > + sizeof env->zmmh_regs); > > +#ifdef TARGET_X86_64 > > + memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs, > > + sizeof env->hi16_zmm_regs); > > +#endif > > r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); > > return r; > > } > > @@ -1402,6 +1413,14 @@ static int kvm_get_xsave(X86CPU *cpu) > > sizeof env->bnd_regs); > > memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR], > > sizeof(env->bndcs_regs)); > > + memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK], > > + sizeof env->opmask_regs); > > + memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256], > > + sizeof env->zmmh_regs); > > +#ifdef TARGET_X86_64 > > + memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM], > > + sizeof env->hi16_zmm_regs); > > +#endif > > return 0; > > } > > > > diff --git a/target-i386/machine.c b/target-i386/machine.c > > index 0dd49f0..708fc54 100644 > > --- a/target-i386/machine.c > > +++ b/target-i386/machine.c > > @@ -60,6 +60,44 @@ static const VMStateDescription vmstate_ymmh_reg = { > > #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \ > > VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg) > > > > +static const VMStateDescription vmstate_zmmh_reg = { > > + .name = "zmmh_reg", > > + .version_id = 1, > > + .minimum_version_id = 1, > > + .fields = (VMStateField[]) { > > + VMSTATE_UINT64(YMM_Q(0), YMMReg), > > + VMSTATE_UINT64(YMM_Q(1), YMMReg), > > + VMSTATE_UINT64(YMM_Q(2), YMMReg), > > + VMSTATE_UINT64(YMM_Q(3), YMMReg), > > + VMSTATE_END_OF_LIST() > > + } > > +}; > > + > > +#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \ > > + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg) > > + > > +#ifdef TARGET_X86_64 > > +static const VMStateDescription vmstate_hi16_zmm_reg = { > > + .name = "hi16_zmm_reg", > > + .version_id = 1, > > + .minimum_version_id = 1, > > + .fields = (VMStateField[]) { > > + VMSTATE_UINT64(ZMM_Q(0), ZMMReg), > > + VMSTATE_UINT64(ZMM_Q(1), ZMMReg), > > + VMSTATE_UINT64(ZMM_Q(2), ZMMReg), > > + VMSTATE_UINT64(ZMM_Q(3), ZMMReg), > > + VMSTATE_UINT64(ZMM_Q(4), ZMMReg), > > + VMSTATE_UINT64(ZMM_Q(5), ZMMReg), > > + VMSTATE_UINT64(ZMM_Q(6), ZMMReg), > > + VMSTATE_UINT64(ZMM_Q(7), ZMMReg), > > + VMSTATE_END_OF_LIST() > > + } > > +}; > > + > > +#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \ > > + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg) > > +#endif > > + > > static const VMStateDescription vmstate_bnd_regs = { > > .name = "bnd_regs", > > .version_id = 1, > > @@ -603,6 +641,52 @@ static const VMStateDescription vmstate_msr_hyperv_time = { > > } > > }; > > > > +static bool avx512_needed(void *opaque) > > +{ > > + X86CPU *cpu = opaque; > > + CPUX86State *env = &cpu->env; > > + unsigned int i; > > + > > + for (i = 0; i < NB_OPMASK_REGS; i++) { > > + if (env->opmask_regs[i]) { > > + return true; > > + } > > + } > > + > > + for (i = 0; i < CPU_NB_REGS; i++) { > > +#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field)) > > + if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) || > > + ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) { > > + return true; > > + } > > +#ifdef TARGET_X86_64 > > +#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field)) > > + if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) || > > + ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) || > > + ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) || > > + ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) { > > + return true; > > + } > > +#endif > > + } > > + > > + return false; > > +} > > + > > +static const VMStateDescription vmstate_avx512 = { > > + .name = "cpu/avx512", > > + .version_id = 1, > > + .minimum_version_id = 1, > > + .fields = (VMStateField[]) { > > + VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS), > > + VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS), > > +#ifdef TARGET_X86_64 > > + VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS), > > +#endif > > + VMSTATE_END_OF_LIST() > > + } > > +}; > > + > > VMStateDescription vmstate_x86_cpu = { > > .name = "cpu", > > .version_id = 12, > > @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = { > > }, { > > .vmsd = &vmstate_msr_hyperv_time, > > .needed = hyperv_time_enable_needed, > > + }, { > > + .vmsd = &vmstate_avx512, > > + .needed = avx512_needed, > > } , { > > /* empty */ > > } > > -- > > 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 02/11/2014 11:19, Michael S. Tsirkin wrote: > > Add AVX512 feature bits, register definition and corresponding > > xsave/vmstate support. > > > > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> > > Thanks! > > As this was first posted after soft freeze, please > resubmit after 2.2 is out. > > See schedule http://wiki.qemu.org/Planning/2.2 Actually this is already in. While "Planning/2.2" says "all features should have patches posted on the mailing list", the actual page describing soft feature freeze is more lenient: By the date of the soft feature freeze, any non-trivial feature should have some code posted to the qemu-devel mailing list if it's targeting a given release. Major features, and in particular features with a high likelihood of breaking things, should already be in the process of being merged. This patch was fairly trivial, as it only adds a few memcpys and a subsection. Likelihood of breaking things is basically zero because AVX512 does not yet exist in silicon. Eduardo reviewed the patch promptly and agreed with putting it in 2.2, so that's what I did. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Nov 03, 2014 at 12:31:42PM +0100, Paolo Bonzini wrote: > > On 02/11/2014 11:19, Michael S. Tsirkin wrote: > > > Add AVX512 feature bits, register definition and corresponding > > > xsave/vmstate support. > > > > > > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> > > > > Thanks! > > > > As this was first posted after soft freeze, please > > resubmit after 2.2 is out. > > > > See schedule http://wiki.qemu.org/Planning/2.2 > > Actually this is already in. > > While "Planning/2.2" says "all features should have patches posted on > the mailing list", the actual page describing soft feature freeze is > more lenient: > > By the date of the soft feature freeze, any non-trivial feature > should have some code posted to the qemu-devel mailing list if it's > targeting a given release. Major features, and in particular > features with a high likelihood of breaking things, should already > be in the process of being merged. > > This patch was fairly trivial, as it only adds a few memcpys and a > subsection. Likelihood of breaking things is basically zero because > AVX512 does not yet exist in silicon. Eduardo reviewed the patch > promptly and agreed with putting it in 2.2, so that's what I did. > > Paolo Oh that's fine. I was not trying to argue, I didn't notice it was in and thought I'm asked to merge it. It's always a judgement call, I trust you did the right thing. Thanks and sorry about the noise.
diff --git a/target-i386/cpu.c b/target-i386/cpu.c index e7bf9de..e91bfbd 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -259,8 +259,8 @@ static const char *svm_feature_name[] = { static const char *cpuid_7_0_ebx_feature_name[] = { "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep", "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL, - NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, + NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL, }; static const char *cpuid_apm_edx_feature_name[] = { @@ -426,6 +426,12 @@ static const ExtSaveArea ext_save_areas[] = { .offset = 0x3c0, .size = 0x40 }, [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, .offset = 0x400, .size = 0x40 }, + [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, + .offset = 0x440, .size = 0x40 }, + [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, + .offset = 0x480, .size = 0x200 }, + [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, + .offset = 0x680, .size = 0x400 }, }; const char *get_register_name_32(unsigned int reg) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 2968749..9f01831 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -395,6 +395,9 @@ #define XSTATE_YMM (1ULL << 2) #define XSTATE_BNDREGS (1ULL << 3) #define XSTATE_BNDCSR (1ULL << 4) +#define XSTATE_OPMASK (1ULL << 5) +#define XSTATE_ZMM_Hi256 (1ULL << 6) +#define XSTATE_Hi16_ZMM (1ULL << 7) /* CPUID feature words */ @@ -560,9 +563,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_INVPCID (1U << 10) #define CPUID_7_0_EBX_RTM (1U << 11) #define CPUID_7_0_EBX_MPX (1U << 14) +#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */ #define CPUID_7_0_EBX_RDSEED (1U << 18) #define CPUID_7_0_EBX_ADX (1U << 19) #define CPUID_7_0_EBX_SMAP (1U << 20) +#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */ +#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ +#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ /* CPUID[0x80000007].EDX flags: */ #define CPUID_APM_INVTSC (1U << 8) @@ -707,6 +714,24 @@ typedef union { } XMMReg; typedef union { + uint8_t _b[32]; + uint16_t _w[16]; + uint32_t _l[8]; + uint64_t _q[4]; + float32 _s[8]; + float64 _d[4]; +} YMMReg; + +typedef union { + uint8_t _b[64]; + uint16_t _w[32]; + uint32_t _l[16]; + uint64_t _q[8]; + float32 _s[16]; + float64 _d[8]; +} ZMMReg; + +typedef union { uint8_t _b[8]; uint16_t _w[4]; uint32_t _l[2]; @@ -725,6 +750,20 @@ typedef struct BNDCSReg { } BNDCSReg; #ifdef HOST_WORDS_BIGENDIAN +#define ZMM_B(n) _b[63 - (n)] +#define ZMM_W(n) _w[31 - (n)] +#define ZMM_L(n) _l[15 - (n)] +#define ZMM_S(n) _s[15 - (n)] +#define ZMM_Q(n) _q[7 - (n)] +#define ZMM_D(n) _d[7 - (n)] + +#define YMM_B(n) _b[31 - (n)] +#define YMM_W(n) _w[15 - (n)] +#define YMM_L(n) _l[7 - (n)] +#define YMM_S(n) _s[7 - (n)] +#define YMM_Q(n) _q[3 - (n)] +#define YMM_D(n) _d[3 - (n)] + #define XMM_B(n) _b[15 - (n)] #define XMM_W(n) _w[7 - (n)] #define XMM_L(n) _l[3 - (n)] @@ -737,6 +776,20 @@ typedef struct BNDCSReg { #define MMX_L(n) _l[1 - (n)] #define MMX_S(n) _s[1 - (n)] #else +#define ZMM_B(n) _b[n] +#define ZMM_W(n) _w[n] +#define ZMM_L(n) _l[n] +#define ZMM_S(n) _s[n] +#define ZMM_Q(n) _q[n] +#define ZMM_D(n) _d[n] + +#define YMM_B(n) _b[n] +#define YMM_W(n) _w[n] +#define YMM_L(n) _l[n] +#define YMM_S(n) _s[n] +#define YMM_Q(n) _q[n] +#define YMM_D(n) _d[n] + #define XMM_B(n) _b[n] #define XMM_W(n) _w[n] #define XMM_L(n) _l[n] @@ -775,6 +828,8 @@ typedef struct { #define NB_MMU_MODES 3 +#define NB_OPMASK_REGS 8 + typedef enum TPRAccess { TPR_ACCESS_READ, TPR_ACCESS_WRITE, @@ -839,6 +894,12 @@ typedef struct CPUX86State { XMMReg ymmh_regs[CPU_NB_REGS]; + uint64_t opmask_regs[NB_OPMASK_REGS]; + YMMReg zmmh_regs[CPU_NB_REGS]; +#ifdef TARGET_X86_64 + ZMMReg hi16_zmm_regs[CPU_NB_REGS]; +#endif + /* sysenter registers */ uint32_t sysenter_cs; target_ulong sysenter_esp; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index ddedc73..ccf36e8 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1031,6 +1031,9 @@ static int kvm_put_fpu(X86CPU *cpu) #define XSAVE_YMMH_SPACE 144 #define XSAVE_BNDREGS 240 #define XSAVE_BNDCSR 256 +#define XSAVE_OPMASK 272 +#define XSAVE_ZMM_Hi256 288 +#define XSAVE_Hi16_ZMM 416 static int kvm_put_xsave(X86CPU *cpu) { @@ -1067,6 +1070,14 @@ static int kvm_put_xsave(X86CPU *cpu) sizeof env->bnd_regs); memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs, sizeof(env->bndcs_regs)); + memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs, + sizeof env->opmask_regs); + memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs, + sizeof env->zmmh_regs); +#ifdef TARGET_X86_64 + memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs, + sizeof env->hi16_zmm_regs); +#endif r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); return r; } @@ -1402,6 +1413,14 @@ static int kvm_get_xsave(X86CPU *cpu) sizeof env->bnd_regs); memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR], sizeof(env->bndcs_regs)); + memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK], + sizeof env->opmask_regs); + memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256], + sizeof env->zmmh_regs); +#ifdef TARGET_X86_64 + memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM], + sizeof env->hi16_zmm_regs); +#endif return 0; } diff --git a/target-i386/machine.c b/target-i386/machine.c index 0dd49f0..708fc54 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -60,6 +60,44 @@ static const VMStateDescription vmstate_ymmh_reg = { #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \ VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg) +static const VMStateDescription vmstate_zmmh_reg = { + .name = "zmmh_reg", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(YMM_Q(0), YMMReg), + VMSTATE_UINT64(YMM_Q(1), YMMReg), + VMSTATE_UINT64(YMM_Q(2), YMMReg), + VMSTATE_UINT64(YMM_Q(3), YMMReg), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \ + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg) + +#ifdef TARGET_X86_64 +static const VMStateDescription vmstate_hi16_zmm_reg = { + .name = "hi16_zmm_reg", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(ZMM_Q(0), ZMMReg), + VMSTATE_UINT64(ZMM_Q(1), ZMMReg), + VMSTATE_UINT64(ZMM_Q(2), ZMMReg), + VMSTATE_UINT64(ZMM_Q(3), ZMMReg), + VMSTATE_UINT64(ZMM_Q(4), ZMMReg), + VMSTATE_UINT64(ZMM_Q(5), ZMMReg), + VMSTATE_UINT64(ZMM_Q(6), ZMMReg), + VMSTATE_UINT64(ZMM_Q(7), ZMMReg), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \ + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg) +#endif + static const VMStateDescription vmstate_bnd_regs = { .name = "bnd_regs", .version_id = 1, @@ -603,6 +641,52 @@ static const VMStateDescription vmstate_msr_hyperv_time = { } }; +static bool avx512_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + unsigned int i; + + for (i = 0; i < NB_OPMASK_REGS; i++) { + if (env->opmask_regs[i]) { + return true; + } + } + + for (i = 0; i < CPU_NB_REGS; i++) { +#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field)) + if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) || + ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) { + return true; + } +#ifdef TARGET_X86_64 +#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field)) + if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) || + ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) || + ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) || + ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) { + return true; + } +#endif + } + + return false; +} + +static const VMStateDescription vmstate_avx512 = { + .name = "cpu/avx512", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS), + VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS), +#ifdef TARGET_X86_64 + VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS), +#endif + VMSTATE_END_OF_LIST() + } +}; + VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = { }, { .vmsd = &vmstate_msr_hyperv_time, .needed = hyperv_time_enable_needed, + }, { + .vmsd = &vmstate_avx512, + .needed = avx512_needed, } , { /* empty */ }
Add AVX512 feature bits, register definition and corresponding xsave/vmstate support. Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com> --- target-i386/cpu.c | 10 ++++-- target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++ target-i386/kvm.c | 19 +++++++++++ target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 175 insertions(+), 2 deletions(-)