@@ -216,6 +216,9 @@ static void recalculate_xstate(struct cp
if ( p->feat.pku )
xstates |= X86_XCR0_PKRU;
+ if ( p->feat.amx_tile )
+ xstates |= X86_XCR0_TILE_CFG | X86_XCR0_TILE_DATA;
+
/* Subleaf 0 */
p->xstate.max_size =
xstate_uncompressed_size(xstates & ~XSTATE_XSAVES_ONLY);
@@ -35,7 +35,8 @@ extern uint32_t mxcsr_mask;
XSTATE_NONLAZY)
#define XSTATE_ALL (~(1ULL << 63))
-#define XSTATE_NONLAZY (X86_XCR0_BNDREGS | X86_XCR0_BNDCSR | X86_XCR0_PKRU)
+#define XSTATE_NONLAZY (X86_XCR0_BNDREGS | X86_XCR0_BNDCSR | X86_XCR0_PKRU | \
+ X86_XCR0_TILE_CFG | X86_XCR0_TILE_DATA)
#define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY)
#define XSTATE_XSAVES_ONLY 0
#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
@@ -278,8 +278,10 @@ XEN_CPUFEATURE(HYBRID, 9*32+15) /
XEN_CPUFEATURE(TSXLDTRK, 9*32+16) /*a TSX load tracking suspend/resume insns */
XEN_CPUFEATURE(ARCH_LBR, 9*32+19) /* Architectural Last Branch Record */
XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */
+XEN_CPUFEATURE(AMX_BF16, 9*32+22) /* AMX BFloat16 instruction */
XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /*A AVX512 FP16 instructions */
XEN_CPUFEATURE(AMX_TILE, 9*32+24) /* AMX Tile architecture */
+XEN_CPUFEATURE(AMX_INT8, 9*32+25) /* AMX 8-bit integer instructions */
XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */
XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */
XEN_CPUFEATURE(L1D_FLUSH, 9*32+28) /*S MSR_FLUSH_CMD and L1D flush. */
@@ -297,6 +299,7 @@ XEN_CPUFEATURE(FZRM, 10*32+10) /
XEN_CPUFEATURE(FSRS, 10*32+11) /*A Fast Short REP STOSB */
XEN_CPUFEATURE(FSRCS, 10*32+12) /*A Fast Short REP CMPSB/SCASB */
XEN_CPUFEATURE(WRMSRNS, 10*32+19) /*S WRMSR Non-Serialising */
+XEN_CPUFEATURE(AMX_FP16, 10*32+21) /* AMX FP16 instruction */
XEN_CPUFEATURE(AVX_IFMA, 10*32+23) /*A AVX-IFMA Instructions */
/* AMD-defined CPU features, CPUID level 0x80000021.eax, word 11 */
@@ -331,6 +334,7 @@ XEN_CPUFEATURE(MCDT_NO, 13*32
/* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */
XEN_CPUFEATURE(AVX_VNNI_INT8, 15*32+ 4) /*A AVX-VNNI-INT8 Instructions */
XEN_CPUFEATURE(AVX_NE_CONVERT, 15*32+ 5) /*A AVX-NE-CONVERT Instructions */
+XEN_CPUFEATURE(AMX_COMPLEX, 15*32+ 8) /* AMX Complex Instructions */
XEN_CPUFEATURE(AVX_VNNI_INT16, 15*32+10) /*A AVX-VNNI-INT16 Instructions */
XEN_CPUFEATURE(PREFETCHI, 15*32+14) /*A PREFETCHIT{0,1} Instructions */
XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */
@@ -252,7 +252,7 @@ def crunch_numbers(state):
# instruction groups which are specified to require XSAVE for state
# management.
XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES,
- AVX, MPX, PKU, LWP],
+ AVX, MPX, PKU, AMX_TILE, LWP],
# AVX is taken to mean hardware support for 256bit registers (which in
# practice depends on the VEX prefix to encode), and the instructions
@@ -274,7 +274,7 @@ def crunch_numbers(state):
# superpages, PCID and PKU are only available in 4 level paging.
# NO_LMSL indicates the absense of Long Mode Segment Limits, which
# have been dropped in hardware.
- LM: [CX16, PCID, LAHF_LM, PAGE1GB, PKU, NO_LMSL],
+ LM: [CX16, PCID, LAHF_LM, PAGE1GB, PKU, NO_LMSL, AMX_TILE],
# AMD K6-2+ and K6-III processors shipped with 3DNow+, beyond the
# standard 3DNow in the earlier K6 processors.
@@ -338,6 +338,11 @@ def crunch_numbers(state):
# The behaviour described by RRSBA depend on eIBRS being active.
EIBRS: [RRSBA],
+
+ # AMX-TILE means hardware support for tile registers and general non-
+ # computational instructions. All further AMX features are built on top
+ # of AMX-TILE.
+ AMX_TILE: [AMX_BF16, AMX_INT8, AMX_FP16, AMX_COMPLEX],
}
deep_features = tuple(sorted(deps.keys()))
These being controlled by XCR0, enabling support is relatively straightforward. Note however that there won't be any use of them until their dependent ISA extension CPUID flags are exposed, not the least due to recalculate_xstate() handling the dependencies in kind of a reverse manner. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- Intended to replace part of (and go beyond) "x86/cpufeatures: Add new cpuid features in SPR to featureset". --- v4: Add AMX-FP16 and AMX-COMPLEX. Add dependency on LM. Re-base. Split off of AMX series. v3: Add new states to XSTATE_NONLAZY. v2: New.