diff mbox series

[v8,02/50] x86emul: support AVX512{F, BW, DQ} extract insns

Message ID 5C8B8045020000780021F119@prv1-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show
Series x86emul: remaining AVX512 support | expand

Commit Message

Jan Beulich March 15, 2019, 10:36 a.m. UTC
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v7: Re-base.
v4: Make use of d8s_dq64.
v3: New.

Comments

Andrew Cooper March 15, 2019, 5:51 p.m. UTC | #1
On 15/03/2019 10:36, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff mbox series

Patch

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -212,6 +212,7 @@  static const struct test avx512f_all[] =
 };
 
 static const struct test avx512f_128[] = {
+    INSN(extractps, 66, 0f3a, 17, el,    d, el),
     INSN(mov,       66,   0f, 6e, el, dq64, el),
     INSN(mov,       66,   0f, 7e, el, dq64, el),
     INSN(movq,      f3,   0f, 7e, el,    q, el),
@@ -221,10 +222,14 @@  static const struct test avx512f_128[] =
 static const struct test avx512f_no128[] = {
     INSN(broadcastf32x4, 66, 0f38, 1a, el_4,  d, vl),
     INSN(broadcastsd,    66, 0f38, 19, el,    q, el),
+    INSN(extractf32x4,   66, 0f3a, 19, el_4,  d, vl),
+    INSN(extracti32x4,   66, 0f3a, 39, el_4,  d, vl),
 };
 
 static const struct test avx512f_512[] = {
     INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl),
+    INSN(extractf64x4,   66, 0f3a, 1b, el_4, q, vl),
+    INSN(extracti64x4,   66, 0f3a, 3b, el_4, q, vl),
 };
 
 static const struct test avx512bw_all[] = {
@@ -280,6 +285,12 @@  static const struct test avx512bw_all[]
     INSN(ptestnm,     f3, 0f38, 26,    vl,   bw, vl),
 };
 
+static const struct test avx512bw_128[] = {
+    INSN(pextrb, 66, 0f3a, 14, el, b, el),
+//       pextrw, 66,   0f, c5,     w
+    INSN(pextrw, 66, 0f3a, 15, el, w, el),
+};
+
 static const struct test avx512dq_all[] = {
     INSN_PFP(and,              0f, 54),
     INSN_PFP(andn,             0f, 55),
@@ -288,13 +299,21 @@  static const struct test avx512dq_all[]
     INSN_PFP(xor,              0f, 57),
 };
 
+static const struct test avx512dq_128[] = {
+    INSN(pextr, 66, 0f3a, 16, el, dq64, el),
+};
+
 static const struct test avx512dq_no128[] = {
     INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl),
     INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl),
+    INSN(extractf64x2,   66, 0f3a, 19, el_2, q, vl),
+    INSN(extracti64x2,   66, 0f3a, 39, el_2, q, vl),
 };
 
 static const struct test avx512dq_512[] = {
     INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl),
+    INSN(extractf32x8,   66, 0f3a, 1b, el_8, d, vl),
+    INSN(extracti32x8,   66, 0f3a, 3b, el_8, d, vl),
 };
 
 static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 };
@@ -632,7 +651,9 @@  void evex_disp8_test(void *instr, struct
     RUN(avx512f, no128);
     RUN(avx512f, 512);
     RUN(avx512bw, all);
+    RUN(avx512bw, 128);
     RUN(avx512dq, all);
+    RUN(avx512dq, 128);
     RUN(avx512dq, no128);
     RUN(avx512dq, 512);
 }
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -512,9 +512,13 @@  static const struct ext0f3a_table {
     [0x0a ... 0x0b] = { .simd_size = simd_scalar_opc },
     [0x0c ... 0x0d] = { .simd_size = simd_packed_fp },
     [0x0e ... 0x0f] = { .simd_size = simd_packed_int },
-    [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 },
+    [0x14] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 0 },
+    [0x15] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 1 },
+    [0x16] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = d8s_dq64 },
+    [0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 2 },
     [0x18] = { .simd_size = simd_128 },
-    [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
+    [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 },
+    [0x1b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
     [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 },
     [0x1e ... 0x1f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x20] = { .simd_size = simd_none },
@@ -523,7 +527,8 @@  static const struct ext0f3a_table {
     [0x25] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x30 ... 0x33] = { .simd_size = simd_other, .two_op = 1 },
     [0x38] = { .simd_size = simd_128 },
-    [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
+    [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 },
+    [0x3b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 },
     [0x3e ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x40 ... 0x41] = { .simd_size = simd_packed_fp },
     [0x42] = { .simd_size = simd_packed_int },
@@ -2676,6 +2681,8 @@  x86_decode_0f3a(
      ... X86EMUL_OPC_66(0, 0x17):     /* pextr*, extractps */
     case X86EMUL_OPC_VEX_66(0, 0x14)
      ... X86EMUL_OPC_VEX_66(0, 0x17): /* vpextr*, vextractps */
+    case X86EMUL_OPC_EVEX_66(0, 0x14)
+     ... X86EMUL_OPC_EVEX_66(0, 0x17): /* vpextr*, vextractps */
     case X86EMUL_OPC_VEX_F2(0, 0xf0): /* rorx */
         break;
 
@@ -8878,9 +8885,9 @@  x86_emulate(
         opc[0] = b;
         /* Convert memory/GPR operand to (%rAX). */
         rex_prefix &= ~REX_B;
-        vex.b = 1;
+        evex.b = vex.b = 1;
         if ( !mode_64bit() )
-            vex.w = 0;
+            evex.w = vex.w = 0;
         opc[1] = modrm & 0x38;
         opc[2] = imm1;
         opc[3] = 0xc3;
@@ -8890,7 +8897,10 @@  x86_emulate(
             --opc;
         }
 
-        copy_REX_VEX(opc, rex_prefix, vex);
+        if ( evex_encoded() )
+            copy_EVEX(opc, evex);
+        else
+            copy_REX_VEX(opc, rex_prefix, vex);
         invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
         put_stub(stub);
 
@@ -8915,6 +8925,52 @@  x86_emulate(
         opc = init_prefixes(stub);
         goto pextr;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xc5):   /* vpextrw $imm8,xmm,reg */
+        generate_exception_if(ea.type != OP_REG, EXC_UD);
+        /* Convert to alternative encoding: We want to use a memory operand. */
+        evex.opcx = ext_0f3a;
+        b = 0x15;
+        modrm <<= 3;
+        evex.r = evex.b;
+        evex.R = evex.x;
+        /* fall through */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x14): /* vpextrb $imm8,xmm,r/m */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x15): /* vpextrw $imm8,xmm,r/m */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x16): /* vpextr{d,q} $imm8,xmm,r/m */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */
+        generate_exception_if((evex.lr || evex.reg != 0xf || !evex.RX ||
+                               evex.opmsk || evex.brs),
+                              EXC_UD);
+        if ( !(b & 2) )
+            host_and_vcpu_must_have(avx512bw);
+        else if ( !(b & 1) )
+            host_and_vcpu_must_have(avx512dq);
+        else
+            host_and_vcpu_must_have(avx512f);
+        get_fpu(X86EMUL_FPU_zmm);
+        opc = init_evex(stub);
+        goto pextr;
+
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x19): /* vextractf32x4 $imm8,{y,z}mm,xmm/m128{k} */
+                                            /* vextractf64x2 $imm8,{y,z}mm,xmm/m128{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x39): /* vextracti32x4 $imm8,{y,z}mm,xmm/m128{k} */
+                                            /* vextracti64x2 $imm8,{y,z}mm,xmm/m128{k} */
+        if ( evex.w )
+            host_and_vcpu_must_have(avx512dq);
+        generate_exception_if(!evex.lr || evex.brs, EXC_UD);
+        fault_suppression = false;
+        goto avx512f_imm8_no_sae;
+
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1b): /* vextractf32x8 $imm8,zmm,ymm/m256{k} */
+                                            /* vextractf64x4 $imm8,zmm,ymm/m256{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3b): /* vextracti32x8 $imm8,zmm,ymm/m256{k} */
+                                            /* vextracti64x4 $imm8,zmm,ymm/m256{k} */
+        if ( !evex.w )
+            host_and_vcpu_must_have(avx512dq);
+        generate_exception_if(evex.lr != 2 || evex.brs, EXC_UD);
+        fault_suppression = false;
+        goto avx512f_imm8_no_sae;
+
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph $imm8,{x,y}mm,xmm/mem */
     {
         uint32_t mxcsr;