diff mbox series

x86emul: support AVX512_VP2INTERSECT insns

Message ID 08083899-7348-63d2-1f28-0932e2295d64@suse.com (mailing list archive)
State New, archived
Headers show
Series x86emul: support AVX512_VP2INTERSECT insns | expand

Commit Message

Jan Beulich July 21, 2020, 10:23 a.m. UTC
The standard memory access pattern once again should allow us to go
without a test harness addition beyond the EVEX Disp8-scaling one.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
(SDE: -tgl)

Comments

Andrew Cooper July 21, 2020, 10:32 a.m. UTC | #1
On 21/07/2020 11:23, Jan Beulich wrote:
> --- a/tools/misc/xen-cpuid.c
> +++ b/tools/misc/xen-cpuid.c
> @@ -160,7 +160,7 @@ static const char *const str_7d0[32] =
>      [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
>      [ 4] = "fsrm",
>  
> -    /*  8 */                [ 9] = "srbds-ctrl",
> +    [ 8] = "avx512_vp2intersect", [ 9] = "srbds-ctrl",
>      [10] = "md-clear",
>      /* 12 */                [13] = "tsx-force-abort",
>      [14] = "serialize",

Are we using underscores or dashes?  I realise its is already
inconsistent, but this is a debugging tool only, and we can change our
minds.

~Andrew
Jan Beulich July 21, 2020, 10:47 a.m. UTC | #2
On 21.07.2020 12:32, Andrew Cooper wrote:
> On 21/07/2020 11:23, Jan Beulich wrote:
>> --- a/tools/misc/xen-cpuid.c
>> +++ b/tools/misc/xen-cpuid.c
>> @@ -160,7 +160,7 @@ static const char *const str_7d0[32] =
>>      [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
>>      [ 4] = "fsrm",
>>  
>> -    /*  8 */                [ 9] = "srbds-ctrl",
>> +    [ 8] = "avx512_vp2intersect", [ 9] = "srbds-ctrl",
>>      [10] = "md-clear",
>>      /* 12 */                [13] = "tsx-force-abort",
>>      [14] = "serialize",
> 
> Are we using underscores or dashes?  I realise its is already
> inconsistent, but this is a debugging tool only, and we can change our
> minds.

I've switched this one to use a dash. Want me to also switch others
(in a separate patch)?

Jan
Andrew Cooper July 21, 2020, 10:48 a.m. UTC | #3
On 21/07/2020 11:47, Jan Beulich wrote:
> On 21.07.2020 12:32, Andrew Cooper wrote:
>> On 21/07/2020 11:23, Jan Beulich wrote:
>>> --- a/tools/misc/xen-cpuid.c
>>> +++ b/tools/misc/xen-cpuid.c
>>> @@ -160,7 +160,7 @@ static const char *const str_7d0[32] =
>>>      [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
>>>      [ 4] = "fsrm",
>>>  
>>> -    /*  8 */                [ 9] = "srbds-ctrl",
>>> +    [ 8] = "avx512_vp2intersect", [ 9] = "srbds-ctrl",
>>>      [10] = "md-clear",
>>>      /* 12 */                [13] = "tsx-force-abort",
>>>      [14] = "serialize",
>> Are we using underscores or dashes?  I realise its is already
>> inconsistent, but this is a debugging tool only, and we can change our
>> minds.
> I've switched this one to use a dash.

Ok.  Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

> Want me to also switch others (in a separate patch)?

Probably best, yes.

Thanks,

~Andrew
diff mbox series

Patch

--- a/tools/libxl/libxl_cpuid.c
+++ b/tools/libxl/libxl_cpuid.c
@@ -214,6 +214,7 @@  int libxl_cpuid_parse_config(libxl_cpuid
 
         {"avx512-4vnniw",0x00000007,  0, CPUID_REG_EDX,  2,  1},
         {"avx512-4fmaps",0x00000007,  0, CPUID_REG_EDX,  3,  1},
+        {"avx512-vp2intersect",0x00000007,0,CPUID_REG_EDX,8, 1},
         {"srbds-ctrl",   0x00000007,  0, CPUID_REG_EDX,  9,  1},
         {"md-clear",     0x00000007,  0, CPUID_REG_EDX, 10,  1},
         {"serialize",    0x00000007,  0, CPUID_REG_EDX, 14,  1},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -160,7 +160,7 @@  static const char *const str_7d0[32] =
     [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
     [ 4] = "fsrm",
 
-    /*  8 */                [ 9] = "srbds-ctrl",
+    [ 8] = "avx512_vp2intersect", [ 9] = "srbds-ctrl",
     [10] = "md-clear",
     /* 12 */                [13] = "tsx-force-abort",
     [14] = "serialize",
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -593,6 +593,10 @@  static const struct test avx512_vnni_all
     INSN(pdpwssds, 66, 0f38, 53, vl, d, vl),
 };
 
+static const struct test avx512_vp2intersect_all[] = {
+    INSN(p2intersect, f2, 0f38, 68, vl, dq, vl)
+};
+
 static const struct test avx512_vpopcntdq_all[] = {
     INSN(popcnt, 66, 0f38, 55, vl, dq, vl)
 };
@@ -996,6 +1000,7 @@  void evex_disp8_test(void *instr, struct
     RUN(avx512_vbmi, all);
     RUN(avx512_vbmi2, all);
     RUN(avx512_vnni, all);
+    RUN(avx512_vp2intersect, all);
     RUN(avx512_vpopcntdq, all);
 
     if ( cpu_has_avx512f )
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -168,6 +168,7 @@  static inline bool xcr0_mask(uint64_t ma
 #define cpu_has_movdir64b  cp.feat.movdir64b
 #define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6))
 #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
+#define cpu_has_avx512_vp2intersect (cp.feat.avx512_vp2intersect && xcr0_mask(0xe6))
 #define cpu_has_serialize  cp.feat.serialize
 #define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -488,6 +488,7 @@  static const struct ext0f38_table {
     [0x62] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_bw },
     [0x63] = { .simd_size = simd_packed_int, .to_mem = 1, .two_op = 1, .d8s = d8s_bw },
     [0x64 ... 0x66] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x68] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x70 ... 0x73] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x75 ... 0x76] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x77] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
@@ -2005,6 +2006,7 @@  amd_like(const struct x86_emulate_ctxt *
 #define vcpu_has_enqcmd()      (ctxt->cpuid->feat.enqcmd)
 #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
 #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
+#define vcpu_has_avx512_vp2intersect() (ctxt->cpuid->feat.avx512_vp2intersect)
 #define vcpu_has_serialize()   (ctxt->cpuid->feat.serialize)
 #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
 
@@ -9545,6 +9547,12 @@  x86_emulate(
         }
         goto simd_zmm;
 
+    case X86EMUL_OPC_EVEX_F2(0x0f38, 0x68): /* vp2intersect{d,q} [xyz]mm/mem,[xyz]mm,k+1 */
+        host_and_vcpu_must_have(avx512_vp2intersect);
+        generate_exception_if(evex.opmsk || !evex.r || !evex.R, EXC_UD);
+        op_bytes = 16 << evex.lr;
+        goto avx512f_no_sae;
+
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x70): /* vpshldvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x72): /* vpshrdvw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
         generate_exception_if(!evex.w, EXC_UD);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -128,6 +128,7 @@ 
 /* CPUID level 0x00000007:0.edx */
 #define cpu_has_avx512_4vnniw   boot_cpu_has(X86_FEATURE_AVX512_4VNNIW)
 #define cpu_has_avx512_4fmaps   boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
+#define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT)
 #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
 #define cpu_has_serialize       boot_cpu_has(X86_FEATURE_SERIALIZE)
 
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -259,6 +259,7 @@  XEN_CPUFEATURE(SSB_NO,        8*32+26) /
 /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
 XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A  AVX512 Neural Network Instructions */
 XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A  AVX512 Multiply Accumulation Single Precision */
+XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a  VP2INTERSECT{D,Q} insns */
 XEN_CPUFEATURE(SRBDS_CTRL,    9*32+ 9) /*   MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */
 XEN_CPUFEATURE(MD_CLEAR,      9*32+10) /*A  VERW clears microarchitectural buffers */
 XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -260,7 +260,7 @@  def crunch_numbers(state):
         # AVX512 features are built on top of AVX512F
         AVX512F: [AVX512DQ, AVX512_IFMA, AVX512PF, AVX512ER, AVX512CD,
                   AVX512BW, AVX512VL, AVX512_4VNNIW, AVX512_4FMAPS,
-                  AVX512_VNNI, AVX512_VPOPCNTDQ],
+                  AVX512_VNNI, AVX512_VPOPCNTDQ, AVX512_VP2INTERSECT],
 
         # AVX512 extensions acting on vectors of bytes/words are made
         # dependents of AVX512BW (as to requiring wider than 16-bit mask