@@ -214,6 +214,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
{"avx512-4vnniw",0x00000007, 0, CPUID_REG_EDX, 2, 1},
{"avx512-4fmaps",0x00000007, 0, CPUID_REG_EDX, 3, 1},
+ {"avx512-vp2intersect",0x00000007,0,CPUID_REG_EDX,8, 1},
{"srbds-ctrl", 0x00000007, 0, CPUID_REG_EDX, 9, 1},
{"md-clear", 0x00000007, 0, CPUID_REG_EDX, 10, 1},
{"serialize", 0x00000007, 0, CPUID_REG_EDX, 14, 1},
@@ -160,7 +160,7 @@ static const char *const str_7d0[32] =
[ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
[ 4] = "fsrm",
- /* 8 */ [ 9] = "srbds-ctrl",
+ [ 8] = "avx512_vp2intersect", [ 9] = "srbds-ctrl",
[10] = "md-clear",
/* 12 */ [13] = "tsx-force-abort",
[14] = "serialize",
@@ -593,6 +593,10 @@ static const struct test avx512_vnni_all
INSN(pdpwssds, 66, 0f38, 53, vl, d, vl),
};
+static const struct test avx512_vp2intersect_all[] = {
+ INSN(p2intersect, f2, 0f38, 68, vl, dq, vl)
+};
+
static const struct test avx512_vpopcntdq_all[] = {
INSN(popcnt, 66, 0f38, 55, vl, dq, vl)
};
@@ -996,6 +1000,7 @@ void evex_disp8_test(void *instr, struct
RUN(avx512_vbmi, all);
RUN(avx512_vbmi2, all);
RUN(avx512_vnni, all);
+ RUN(avx512_vp2intersect, all);
RUN(avx512_vpopcntdq, all);
if ( cpu_has_avx512f )
@@ -168,6 +168,7 @@ static inline bool xcr0_mask(uint64_t ma
#define cpu_has_movdir64b cp.feat.movdir64b
#define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6))
#define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
+#define cpu_has_avx512_vp2intersect (cp.feat.avx512_vp2intersect && xcr0_mask(0xe6))
#define cpu_has_serialize cp.feat.serialize
#define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
@@ -488,6 +488,7 @@ static const struct ext0f38_table {
[0x62] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_bw },
[0x63] = { .simd_size = simd_packed_int, .to_mem = 1, .two_op = 1, .d8s = d8s_bw },
[0x64 ... 0x66] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+ [0x68] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x70 ... 0x73] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x75 ... 0x76] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x77] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
@@ -2005,6 +2006,7 @@ amd_like(const struct x86_emulate_ctxt *
#define vcpu_has_enqcmd() (ctxt->cpuid->feat.enqcmd)
#define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
#define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
+#define vcpu_has_avx512_vp2intersect() (ctxt->cpuid->feat.avx512_vp2intersect)
#define vcpu_has_serialize() (ctxt->cpuid->feat.serialize)
#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
@@ -9545,6 +9547,12 @@ x86_emulate(
}
goto simd_zmm;
+ case X86EMUL_OPC_EVEX_F2(0x0f38, 0x68): /* vp2intersect{d,q} [xyz]mm/mem,[xyz]mm,k+1 */
+ host_and_vcpu_must_have(avx512_vp2intersect);
+ generate_exception_if(evex.opmsk || !evex.r || !evex.R, EXC_UD);
+ op_bytes = 16 << evex.lr;
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_EVEX_66(0x0f38, 0x70): /* vpshldvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f38, 0x72): /* vpshrdvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
generate_exception_if(!evex.w, EXC_UD);
@@ -128,6 +128,7 @@
/* CPUID level 0x00000007:0.edx */
#define cpu_has_avx512_4vnniw boot_cpu_has(X86_FEATURE_AVX512_4VNNIW)
#define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
+#define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT)
#define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
#define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE)
@@ -259,6 +259,7 @@ XEN_CPUFEATURE(SSB_NO, 8*32+26) /
/* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */
XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */
+XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a VP2INTERSECT{D,Q} insns */
XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */
XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */
XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
@@ -260,7 +260,7 @@ def crunch_numbers(state):
# AVX512 features are built on top of AVX512F
AVX512F: [AVX512DQ, AVX512_IFMA, AVX512PF, AVX512ER, AVX512CD,
AVX512BW, AVX512VL, AVX512_4VNNIW, AVX512_4FMAPS,
- AVX512_VNNI, AVX512_VPOPCNTDQ],
+ AVX512_VNNI, AVX512_VPOPCNTDQ, AVX512_VP2INTERSECT],
# AVX512 extensions acting on vectors of bytes/words are made
# dependents of AVX512BW (as to requiring wider than 16-bit mask
The standard memory access pattern once again should allow us to go without a test harness addition beyond the EVEX Disp8-scaling one. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- (SDE: -tgl)