diff mbox series

[v8,26/50] x86emul: support remaining AVX512BW legacy-equivalent insns

Message ID 5C8B8482020000780021F217@prv1-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show
Series x86emul: remaining AVX512 support | expand

Commit Message

Jan Beulich March 15, 2019, 10:54 a.m. UTC
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v8: Re-base.
v5: New.

Comments

Andrew Cooper May 21, 2019, 1:08 p.m. UTC | #1
On 15/03/2019 10:54, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -435,7 +435,10 @@ static const struct ext0f38_table {
>      disp8scale_t d8s:4;
>  } ext0f38_table[256] = {
>      [0x00] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
> -    [0x01 ... 0x0b] = { .simd_size = simd_packed_int },
> +    [0x01 ... 0x03] = { .simd_size = simd_packed_int },
> +    [0x04] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
> +    [0x05 ... 0x0b] = { .simd_size = simd_packed_int },
> +    [0x0b] = { .simd_size = simd_packed_int, .d8s = d8s_vl },

It doesn't look as if you mean 0x0b twice here, although its quite
possible that GCC elides it silently (Clang definitely won't).

~Andrew
Jan Beulich May 21, 2019, 1:34 p.m. UTC | #2
>>> On 21.05.19 at 15:08, <andrew.cooper3@citrix.com> wrote:
> On 15/03/2019 10:54, Jan Beulich wrote:
>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>> @@ -435,7 +435,10 @@ static const struct ext0f38_table {
>>      disp8scale_t d8s:4;
>>  } ext0f38_table[256] = {
>>      [0x00] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
>> -    [0x01 ... 0x0b] = { .simd_size = simd_packed_int },
>> +    [0x01 ... 0x03] = { .simd_size = simd_packed_int },
>> +    [0x04] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
>> +    [0x05 ... 0x0b] = { .simd_size = simd_packed_int },
>> +    [0x0b] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
> 
> It doesn't look as if you mean 0x0b twice here, although its quite
> possible that GCC elides it silently (Clang definitely won't).

Indeed, I failed to edit the upper bound of the range in question -
with gcc the last initializer for an array slot wins. Fixed for v9.

Jan
Andrew Cooper May 23, 2019, 4:10 p.m. UTC | #3
On 15/03/2019 10:54, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

With the identified issue fixed, Acked-by: Andrew Cooper
<andrew.cooper3@citrix.com>
diff mbox series

Patch

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -354,6 +354,7 @@  static const struct test avx512bw_all[]
     INSN(paddusb,     66,   0f, dc,    vl,    b, vl),
     INSN(paddusw,     66,   0f, dd,    vl,    w, vl),
     INSN(paddw,       66,   0f, fd,    vl,    w, vl),
+    INSN(palignr,     66, 0f3a, 0f,    vl,    b, vl),
     INSN(pavgb,       66,   0f, e0,    vl,    b, vl),
     INSN(pavgw,       66,   0f, e3,    vl,    w, vl),
     INSN(pbroadcastb, 66, 0f38, 78,    el,    b, el),
@@ -369,6 +370,7 @@  static const struct test avx512bw_all[]
     INSN(permw,       66, 0f38, 8d,    vl,    w, vl),
     INSN(permi2w,     66, 0f38, 75,    vl,    w, vl),
     INSN(permt2w,     66, 0f38, 7d,    vl,    w, vl),
+    INSN(pmaddubsw,   66, 0f38, 04,    vl,    b, vl),
     INSN(pmaddwd,     66,   0f, f5,    vl,    w, vl),
     INSN(pmaxsb,      66, 0f38, 3c,    vl,    b, vl),
     INSN(pmaxsw,      66,   0f, ee,    vl,    w, vl),
@@ -386,6 +388,7 @@  static const struct test avx512bw_all[]
 //       pmovw2m,     f3, 0f38, 29,           w
     INSN(pmovwb,      f3, 0f38, 30,    vl_2,  b, vl),
     INSN(pmovzxbw,    66, 0f38, 30,    vl_2,  b, vl),
+    INSN(pmulhrsw,    66, 0f38, 0b,    vl,    w, vl),
     INSN(pmulhuw,     66,   0f, e4,    vl,    w, vl),
     INSN(pmulhw,      66,   0f, e5,    vl,    w, vl),
     INSN(pmullw,      66,   0f, d5,    vl,    w, vl),
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -587,6 +587,7 @@  static inline vec_t movlhps(vec_t x, vec
 #  if VEC_SIZE == 16
 #   define interleave_hi(x, y) ((vec_t)B(punpckhbw, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0))
 #   define interleave_lo(x, y) ((vec_t)B(punpcklbw, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0))
+#   define rotr(x, n) ((vec_t)B(palignr, _mask, (vdi_t)(x), (vdi_t)(x), (n) * 8, (vdi_t)undef(), ~0))
 #   define swap(x) ((vec_t)B(pshufb, _mask, (vqi_t)(x), (vqi_t)(inv - 1), (vqi_t)undef(), ~0))
 #  elif defined(__AVX512VBMI__)
 #   define interleave_hi(x, y) ((vec_t)B(vpermi2varqi, _mask, (vqi_t)(x), interleave_hi, (vqi_t)(y), ~0))
@@ -615,6 +616,7 @@  static inline vec_t movlhps(vec_t x, vec
 #  if VEC_SIZE == 16
 #   define interleave_hi(x, y) ((vec_t)B(punpckhwd, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), ~0))
 #   define interleave_lo(x, y) ((vec_t)B(punpcklwd, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), ~0))
+#   define rotr(x, n) ((vec_t)B(palignr, _mask, (vdi_t)(x), (vdi_t)(x), (n) * 16, (vdi_t)undef(), ~0))
 #   define swap(x) ((vec_t)B(pshufd, _mask, \
                              (vsi_t)B(pshufhw, _mask, \
                                       B(pshuflw, _mask, (vhi_t)(x), 0b00011011, (vhi_t)undef(), ~0), \
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -402,9 +402,12 @@  OVR(packssdw);
 OVR(packsswb);
 OVR(packusdw);
 OVR(packuswb);
+OVR(palignr);
+OVR(pmaddubsw);
 OVR(pmaddwd);
 OVR(pmovsxbw);
 OVR(pmovzxbw);
+OVR(pmulhrsw);
 OVR(pmulhuw);
 OVR(pmulhw);
 OVR(pmullw);
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -435,7 +435,10 @@  static const struct ext0f38_table {
     disp8scale_t d8s:4;
 } ext0f38_table[256] = {
     [0x00] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
-    [0x01 ... 0x0b] = { .simd_size = simd_packed_int },
+    [0x01 ... 0x03] = { .simd_size = simd_packed_int },
+    [0x04] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+    [0x05 ... 0x0b] = { .simd_size = simd_packed_int },
+    [0x0b] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x0c ... 0x0d] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
     [0x0e ... 0x0f] = { .simd_size = simd_packed_fp },
     [0x10 ... 0x12] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
@@ -534,7 +537,8 @@  static const struct ext0f3a_table {
     [0x08 ... 0x09] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
     [0x0a ... 0x0b] = { .simd_size = simd_scalar_opc, .d8s = d8s_dq },
     [0x0c ... 0x0d] = { .simd_size = simd_packed_fp },
-    [0x0e ... 0x0f] = { .simd_size = simd_packed_int },
+    [0x0e] = { .simd_size = simd_packed_int },
+    [0x0f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x14] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 0 },
     [0x15] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 1 },
     [0x16] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = d8s_dq64 },
@@ -6899,6 +6903,7 @@  x86_emulate(
     case X86EMUL_OPC_EVEX_66(0x0f, 0xf1): /* vpsllw xmm/m128,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xf5): /* vpmaddwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x00): /* vpshufb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x04): /* vpmaddubsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
         fault_suppression = false;
         /* fall through */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xd5): /* vpmullw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
@@ -6917,6 +6922,7 @@  x86_emulate(
     case X86EMUL_OPC_EVEX_66(0x0f, 0xf9): /* vpsubw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xfc): /* vpaddb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xfd): /* vpaddw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x0b): /* vpmulhrsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x1c): /* vpabsb [xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x1d): /* vpabsw [xyz]mm/mem,[xyz]mm{k} */
         host_and_vcpu_must_have(avx512bw);
@@ -9374,6 +9380,10 @@  x86_emulate(
         insn_bytes = PFX_BYTES + 4;
         break;
 
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        fault_suppression = false;
+        goto avx512bw_imm;
+
     case X86EMUL_OPC_66(0x0f3a, 0x14): /* pextrb $imm8,xmm,r/m */
     case X86EMUL_OPC_66(0x0f3a, 0x15): /* pextrw $imm8,xmm,r/m */
     case X86EMUL_OPC_66(0x0f3a, 0x16): /* pextr{d,q} $imm8,xmm,r/m */