@@ -543,6 +543,11 @@ static const struct test avx512_bitalg_a
INSN(pshufbitqmb, 66, 0f38, 8f, vl, b, vl),
};
+static const struct test avx512_ifma_all[] = {
+ INSN(pmadd52huq, 66, 0f38, b5, vl, q, vl),
+ INSN(pmadd52luq, 66, 0f38, b4, vl, q, vl),
+};
+
static const struct test avx512_vbmi_all[] = {
INSN(permb, 66, 0f38, 8d, vl, b, vl),
INSN(permi2b, 66, 0f38, 75, vl, b, vl),
@@ -929,6 +934,7 @@ void evex_disp8_test(void *instr, struct
#define cpu_has_avx512pf cpu_has_avx512f
RUN(avx512pf, 512);
RUN(avx512_bitalg, all);
+ RUN(avx512_ifma, all);
RUN(avx512_vbmi, all);
RUN(avx512_vbmi2, all);
RUN(avx512_vpopcntdq, all);
@@ -137,6 +137,7 @@ static inline bool xcr0_mask(uint64_t ma
#define cpu_has_bmi2 cp.feat.bmi2
#define cpu_has_avx512f (cp.feat.avx512f && xcr0_mask(0xe6))
#define cpu_has_avx512dq (cp.feat.avx512dq && xcr0_mask(0xe6))
+#define cpu_has_avx512_ifma (cp.feat.avx512_ifma && xcr0_mask(0xe6))
#define cpu_has_avx512er (cp.feat.avx512er && xcr0_mask(0xe6))
#define cpu_has_avx512cd (cp.feat.avx512cd && xcr0_mask(0xe6))
#define cpu_has_avx512bw (cp.feat.avx512bw && xcr0_mask(0xe6))
@@ -521,6 +521,7 @@ static const struct ext0f38_table {
[0xad] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
[0xae] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
[0xaf] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+ [0xb4 ... 0xb5] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0xb6 ... 0xb8] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
[0xb9] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
[0xba] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
@@ -1907,6 +1908,7 @@ static bool vcpu_has(
#define vcpu_has_rdseed() vcpu_has( 7, EBX, 18, ctxt, ops)
#define vcpu_has_adx() vcpu_has( 7, EBX, 19, ctxt, ops)
#define vcpu_has_smap() vcpu_has( 7, EBX, 20, ctxt, ops)
+#define vcpu_has_avx512_ifma() vcpu_has( 7, EBX, 21, ctxt, ops)
#define vcpu_has_clflushopt() vcpu_has( 7, EBX, 23, ctxt, ops)
#define vcpu_has_clwb() vcpu_has( 7, EBX, 24, ctxt, ops)
#define vcpu_has_avx512pf() vcpu_has( 7, EBX, 26, ctxt, ops)
@@ -9470,6 +9472,12 @@ x86_emulate(
break;
}
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0xb4): /* vpmadd52luq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0xb5): /* vpmadd52huq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512_ifma);
+ generate_exception_if(!evex.w, EXC_UD);
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_EVEX_66(0x0f38, 0xc6):
case X86EMUL_OPC_EVEX_66(0x0f38, 0xc7):
{
@@ -102,6 +102,7 @@
#define cpu_has_avx512dq boot_cpu_has(X86_FEATURE_AVX512DQ)
#define cpu_has_rdseed boot_cpu_has(X86_FEATURE_RDSEED)
#define cpu_has_smap boot_cpu_has(X86_FEATURE_SMAP)
+#define cpu_has_avx512_ifma boot_cpu_has(X86_FEATURE_AVX512_IFMA)
#define cpu_has_avx512er boot_cpu_has(X86_FEATURE_AVX512ER)
#define cpu_has_avx512cd boot_cpu_has(X86_FEATURE_AVX512CD)
#define cpu_has_sha boot_cpu_has(X86_FEATURE_SHA)
@@ -212,7 +212,7 @@ XEN_CPUFEATURE(AVX512DQ, 5*32+17) /
XEN_CPUFEATURE(RDSEED, 5*32+18) /*A RDSEED instruction */
XEN_CPUFEATURE(ADX, 5*32+19) /*A ADCX, ADOX instructions */
XEN_CPUFEATURE(SMAP, 5*32+20) /*S Supervisor Mode Access Prevention */
-XEN_CPUFEATURE(AVX512IFMA, 5*32+21) /*A AVX-512 Integer Fused Multiply Add */
+XEN_CPUFEATURE(AVX512_IFMA, 5*32+21) /*A AVX-512 Integer Fused Multiply Add */
XEN_CPUFEATURE(CLFLUSHOPT, 5*32+23) /*A CLFLUSHOPT instruction */
XEN_CPUFEATURE(CLWB, 5*32+24) /*A CLWB instruction */
XEN_CPUFEATURE(AVX512PF, 5*32+26) /*A AVX-512 Prefetch Instructions */
@@ -262,7 +262,7 @@ def crunch_numbers(state):
# (which in practice depends on the EVEX prefix to encode) as well
# as mask registers, and the instructions themselves. All further
# AVX512 features are built on top of AVX512F
- AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
+ AVX512F: [AVX512DQ, AVX512_IFMA, AVX512PF, AVX512ER, AVX512CD,
AVX512BW, AVX512VL, AVX512_4VNNIW, AVX512_4FMAPS,
AVX512_VPOPCNTDQ],
Once again take the liberty and also correct the (public interface) name of the AVX512_IFMA feature flag to match the SDM, on the assumption that no external consumer has actually been using that flag so far. As in a few cases before, since the insns here and in particular their memory access patterns follow the usual scheme, I didn't think it was necessary to add a contrived test specifically for them, beyond the Disp8 scaling one. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v7: Reject EVEX.W=0. v6: New.