@@ -3362,7 +3362,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
switch ( input )
{
- unsigned int _ecx, _edx;
+ unsigned int _ebx, _ecx, _edx;
case 0x1:
/* Fix up VLAPIC details. */
@@ -3443,6 +3443,51 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
switch ( count )
{
case 0:
+ {
+ uint64_t xfeature_mask = XSTATE_FP_SSE;
+ uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
+
+ if ( _ecx & cpufeat_mask(X86_FEATURE_AVX) )
+ {
+ xfeature_mask |= XSTATE_YMM;
+ xstate_size = MAX(xstate_size,
+ xstate_offsets[_XSTATE_YMM] +
+ xstate_sizes[_XSTATE_YMM]);
+ }
+
+ _ecx = 0;
+ hvm_cpuid(7, NULL, &_ebx, &_ecx, NULL);
+
+ if ( _ebx & cpufeat_mask(X86_FEATURE_MPX) )
+ {
+ xfeature_mask |= XSTATE_BNDREGS | XSTATE_BNDCSR;
+ xstate_size = MAX(xstate_size,
+ xstate_offsets[_XSTATE_BNDCSR] +
+ xstate_sizes[_XSTATE_BNDCSR]);
+ }
+
+ if ( _ebx & cpufeat_mask(X86_FEATURE_PKU) )
+ {
+ xfeature_mask |= XSTATE_PKRU;
+ xstate_size = MAX(xstate_size,
+ xstate_offsets[_XSTATE_PKRU] +
+ xstate_sizes[_XSTATE_PKRU]);
+ }
+
+ hvm_cpuid(0x80000001, NULL, NULL, &_ecx, NULL);
+
+ if ( _ecx & cpufeat_mask(X86_FEATURE_LWP) )
+ {
+ xfeature_mask |= XSTATE_LWP;
+ xstate_size = MAX(xstate_size,
+ xstate_offsets[_XSTATE_LWP] +
+ xstate_sizes[_XSTATE_LWP]);
+ }
+
+ *eax = (uint32_t)xfeature_mask;
+ *edx = (uint32_t)(xfeature_mask >> 32);
+ *ecx = xstate_size;
+
/*
* Always read CPUID[0xD,0].EBX from hardware, rather than domain
* policy. It varies with enabled xstate, and the correct xcr0 is @@ -3450,6 +3495,8 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
*/
cpuid_count(input, count, &dummy, ebx, &dummy, &dummy);
break;
+ }
+
case 1:
*eax &= hvm_featureset[FEATURESET_Da1];
@@ -3463,7 +3510,9 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
cpuid_count(input, count, &dummy, ebx, &dummy, &dummy);
}
else
- *ebx = *ecx = *edx = 0;
+ *ebx = 0;
+
+ *ecx = *edx = 0;
break;
}
break;
@@ -928,7 +928,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
switch ( leaf )
{
- uint32_t tmp;
+ uint32_t tmp, _ecx;
case 0x00000001:
c &= pv_featureset[FEATURESET_1c]; @@ -1087,19 +1087,48 @@ void pv_cpuid(struct cpu_user_regs *regs)
break;
case XSTATE_CPUID:
- if ( !((!is_control_domain(currd) && !is_hardware_domain(currd)
- ? ({
- uint32_t ecx;
-
- domain_cpuid(currd, 1, 0, &tmp, &tmp, &ecx, &tmp);
- ecx & pv_featureset[FEATURESET_1c];
- })
- : cpuid_ecx(1)) & cpufeat_mask(X86_FEATURE_XSAVE)) ||
- subleaf >= 63 )
+
+ if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+ domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
+ else
+ _ecx = cpuid_ecx(1);
+ _ecx &= pv_featureset[FEATURESET_1c];
+
+ if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 63
+ )
goto unsupported;
switch ( subleaf )
{
case 0:
+ {
+ uint64_t xfeature_mask = XSTATE_FP_SSE;
+ uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
+
+ if ( _ecx & cpufeat_mask(X86_FEATURE_AVX) )
+ {
+ xfeature_mask |= XSTATE_YMM;
+ xstate_size = MAX(xstate_size,
+ xstate_offsets[_XSTATE_YMM] +
+ xstate_sizes[_XSTATE_YMM]);
+ }
+
+ if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+ domain_cpuid(currd, 0x80000001, 0, &tmp, &tmp, &_ecx, &tmp);
+ else
+ _ecx = cpuid_ecx(0x80000001);
+ _ecx &= pv_featureset[FEATURESET_e1c];
+
+ if ( _ecx & cpufeat_mask(X86_FEATURE_LWP) )
+ {
+ xfeature_mask |= XSTATE_LWP;
+ xstate_size = MAX(xstate_size,
+ xstate_offsets[_XSTATE_LWP] +
+ xstate_sizes[_XSTATE_LWP]);
+ }
+
+ a = (uint32_t)xfeature_mask;
+ d = (uint32_t)(xfeature_mask >> 32);
+ c = xstate_size;
+
/*
* Always read CPUID.0xD[ECX=0].EBX from hardware, rather than
* domain policy. It varies with enabled xstate, and the correct @@ -1108,6 +1137,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
cpuid_count(leaf, subleaf, &tmp, &b, &tmp, &tmp);
break;
+ }
case 1:
a &= pv_featureset[FEATURESET_Da1]; diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c index a0cfcc2..1fd1ce8 100644
@@ -24,7 +24,7 @@ static u32 __read_mostly xsave_cntxt_size;
/* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
u64 __read_mostly xfeature_mask;
-static unsigned int *__read_mostly xstate_offsets;
+unsigned int *__read_mostly xstate_offsets;
unsigned int *__read_mostly xstate_sizes;
u64 __read_mostly xstate_align;
static unsigned int __read_mostly xstate_features; diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h index 4535354..51a9ed4 100644
@@ -26,16 +26,27 @@
#define XSAVE_HDR_OFFSET FXSAVE_SIZE
#define XSTATE_AREA_MIN_SIZE (FXSAVE_SIZE + XSAVE_HDR_SIZE)
-#define XSTATE_FP (1ULL << 0)
-#define XSTATE_SSE (1ULL << 1)
-#define XSTATE_YMM (1ULL << 2)
-#define XSTATE_BNDREGS (1ULL << 3)
-#define XSTATE_BNDCSR (1ULL << 4)
-#define XSTATE_OPMASK (1ULL << 5)
-#define XSTATE_ZMM (1ULL << 6)
-#define XSTATE_HI_ZMM (1ULL << 7)
-#define XSTATE_PKRU (1ULL << 9)
-#define XSTATE_LWP (1ULL << 62) /* AMD lightweight profiling */
+#define _XSTATE_FP 0
+#define XSTATE_FP (1ULL << _XSTATE_FP)
+#define _XSTATE_SSE 1
+#define XSTATE_SSE (1ULL << _XSTATE_SSE)
+#define _XSTATE_YMM 2
+#define XSTATE_YMM (1ULL << _XSTATE_YMM)
+#define _XSTATE_BNDREGS 3
+#define XSTATE_BNDREGS (1ULL << _XSTATE_BNDREGS)
+#define _XSTATE_BNDCSR 4
+#define XSTATE_BNDCSR (1ULL << _XSTATE_BNDCSR)
+#define _XSTATE_OPMASK 5
+#define XSTATE_OPMASK (1ULL << _XSTATE_OPMASK)
+#define _XSTATE_ZMM 6
+#define XSTATE_ZMM (1ULL << _XSTATE_ZMM)
+#define _XSTATE_HI_ZMM 7
+#define XSTATE_HI_ZMM (1ULL << _XSTATE_HI_ZMM)
+#define _XSTATE_PKRU 9
+#define XSTATE_PKRU (1ULL << _XSTATE_PKRU)
+#define _XSTATE_LWP 62
+#define XSTATE_LWP (1ULL << _XSTATE_LWP)
+
#define XSTATE_FP_SSE (XSTATE_FP | XSTATE_SSE)
#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | \
XSTATE_ZMM | XSTATE_HI_ZMM | XSTATE_NONLAZY) @@ -51,6 +62,7 @@