@@ -657,7 +657,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
kvm_cpu_cap_init_scattered(CPUID_7_1_EAX,
- F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD)
+ F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16)
);
kvm_cpu_cap_mask(CPUID_D_1_EAX,
@@ -29,6 +29,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_AVX_VNNI KVM_X86_FEATURE(CPUID_7_1_EAX, 4)
#define X86_FEATURE_AVX512_BF16 KVM_X86_FEATURE(CPUID_7_1_EAX, 5)
#define X86_FEATURE_CMPCCXADD KVM_X86_FEATURE(CPUID_7_1_EAX, 7)
+#define X86_FEATURE_AMX_FP16 KVM_X86_FEATURE(CPUID_7_1_EAX, 21)
struct cpuid_reg {
u32 function;
Latest Intel platform Granite Rapids has introduced a new instruction - AMX-FP16, which performs dot-products of two FP16 tiles and accumulates the results into a packed single precision tile. This instrucion needs no additional enabling on top of the existing kernel AMX enabling. AMX-FP16 adds FP16 capability and also allows a FP16 GPU trained model to run faster without loss of accuracy or added SW overhead. The bit definition: CPUID.(EAX=7,ECX=1):EAX[bit 21] This CPUID is exposed to user space. Besides, there is no other VMX control for this instruction. Signed-off-by: Jiaxi Chen <jiaxi.chen@linux.intel.com> --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/reverse_cpuid.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-)