diff mbox series

[v8,20/50] x86emul: support AVX512F legacy-equivalent packed int/FP conversion insns

Message ID 5C8B82D6020000780021F1CA@prv1-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show
Series x86emul: remaining AVX512 support | expand

Commit Message

Jan Beulich March 15, 2019, 10:47 a.m. UTC
... including the two AVX512DQ forms which shared encodings, just with
EVEX.W set there.

VCVTDQ2PD, sharing its main opcode with others, needs a "manual"
override of disp8scale.

The simd_size changes for the twobyte_table[] entries are benign to
pre-existing code, but allow decode_disp8scale() to work as is here.

The at this point wrong placement of the 0xe6 case block is once again
in anticipation of further additions of case labels.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v7: ea.type == OP_* -> ea.type != OP_*. Re-base.
v6: Re-base over changes earlier in the series.
v4: New.

Comments

Andrew Cooper May 21, 2019, 11:37 a.m. UTC | #1
On 15/03/2019 10:47, Jan Beulich wrote:
> ... including the two AVX512DQ forms which shared encodings, just with
> EVEX.W set there.
>
> VCVTDQ2PD, sharing its main opcode with others, needs a "manual"
> override of disp8scale.
>
> The simd_size changes for the twobyte_table[] entries are benign to
> pre-existing code, but allow decode_disp8scale() to work as is here.
>
> The at this point wrong placement of the 0xe6 case block is once again
> in anticipation of further additions of case labels.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff mbox series

Patch

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -109,8 +109,12 @@  static const struct test avx512f_all[] =
     INSN_FP(cmp,             0f, c2),
     INSN(comisd,       66,   0f, 2f,    el,      q, el),
     INSN(comiss,         ,   0f, 2f,    el,      d, el),
+    INSN(cvtdq2pd,     f3,   0f, e6,    vl_2,    d, vl),
+    INSN(cvtdq2ps,       ,   0f, 5b,    vl,      d, vl),
+    INSN(cvtpd2dq,     f2,   0f, e6,    vl,      q, vl),
     INSN(cvtpd2ps,     66,   0f, 5a,    vl,      q, vl),
     INSN(cvtph2ps,     66, 0f38, 13,    vl_2, d_nb, vl),
+    INSN(cvtps2dq,     66,   0f, 5b,    vl,      d, vl),
     INSN(cvtps2pd,       ,   0f, 5a,    vl_2,    d, vl),
     INSN(cvtps2ph,     66, 0f3a, 1d,    vl_2, d_nb, vl),
     INSN(cvtsd2ss,     f2,   0f, 5a,    el,      q, el),
@@ -398,6 +402,8 @@  static const struct test avx512dq_all[]
     INSN_PFP(and,              0f, 54),
     INSN_PFP(andn,             0f, 55),
     INSN(broadcasti32x2, 66, 0f38, 59, el_2,  d, vl),
+    INSN(cvtqq2pd,       f3,   0f, e6,   vl,  q, vl),
+    INSN(cvtqq2ps,         ,   0f, 5b,   vl,  q, vl),
     INSN_PFP(or,               0f, 56),
 //       pmovd2m,        f3, 0f38, 39,        d
 //       pmovm2,         f3, 0f38, 38,       dq
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -92,6 +92,13 @@  static inline bool _to_bool(byte_vec_t b
 # define to_int(x) ((vec_t){ (int)(x)[0] })
 #elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
 # define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
+#elif defined(FLOAT_SIZE) && VEC_SIZE > FLOAT_SIZE && defined(__AVX512F__) && \
+      (VEC_SIZE == 64 || defined(__AVX512VL__))
+# if FLOAT_SIZE == 4
+#  define to_int(x) BR(cvtdq2ps, _mask, BR(cvtps2dq, _mask, x, (vsi_t)undef(), ~0), undef(), ~0)
+# elif FLOAT_SIZE == 8
+#  define to_int(x) B(cvtdq2pd, _mask, BR(cvtpd2dq, _mask, x, (vsi_half_t){}, ~0), undef(), ~0)
+# endif
 #elif VEC_SIZE == 16 && defined(__SSE2__)
 # if FLOAT_SIZE == 4
 #  define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x))
@@ -1142,15 +1149,21 @@  int simd_test(void)
     touch(src);
     if ( !eq(x * -alt, -src) ) return __LINE__;
 
-# if defined(recip) && defined(to_int)
+# ifdef to_int
+
+    touch(src);
+    x = to_int(src);
+    touch(src);
+    if ( !eq(x, src) ) return __LINE__;
 
+#  ifdef recip
     touch(src);
     x = recip(src);
     touch(src);
     touch(x);
     if ( !eq(to_int(recip(x)), src) ) return __LINE__;
 
-#  ifdef rsqrt
+#   ifdef rsqrt
     x = src * src;
     touch(x);
     y = rsqrt(x);
@@ -1158,6 +1171,7 @@  int simd_test(void)
     if ( !eq(to_int(recip(y)), src) ) return __LINE__;
     touch(src);
     if ( !eq(to_int(y), to_int(recip(src))) ) return __LINE__;
+#   endif
 #  endif
 
 # endif
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -244,6 +244,7 @@  asm ( ".macro override insn    \n\t"
 OVR_INT(broadcast);
 OVR_SFP(broadcast);
 OVR_SFP(comi);
+OVR_VFP(cvtdq2);
 OVR_FP(add);
 OVR_INT(add);
 OVR_BW(adds);
@@ -330,13 +331,19 @@  REN(pandn, , d);
 REN(por, , d);
 REN(pxor, , d);
 #  endif
+OVR(cvtpd2dqx);
+OVR(cvtpd2dqy);
 OVR(cvtpd2psx);
 OVR(cvtpd2psy);
 OVR(cvtph2ps);
+OVR(cvtps2dq);
 OVR(cvtps2pd);
 OVR(cvtps2ph);
 OVR(cvtsd2ss);
 OVR(cvtss2sd);
+OVR(cvttpd2dqx);
+OVR(cvttpd2dqy);
+OVR(cvttps2dq);
 OVR(movddup);
 OVR(movntdq);
 OVR(movntdqa);
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -311,7 +311,7 @@  static const struct twobyte_table {
     [0x54 ... 0x57] = { DstImplicit|SrcMem|ModRM, simd_packed_fp, d8s_vl },
     [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
     [0x5a] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp, d8s_vl },
-    [0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+    [0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
     [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
     [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl },
     [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
@@ -375,7 +375,7 @@  static const struct twobyte_table {
     [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_128, 4 },
     [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+    [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
     [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
     [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
@@ -3081,6 +3081,11 @@  x86_decode(
                 if ( disp8scale == 2 && evex.pfx == vex_f3 )
                     disp8scale = 3;
                 break;
+
+            case 0xe6: /* vcvtdq2pd needs special casing */
+                if ( disp8scale && evex.pfx == vex_f3 && !evex.w && !evex.brs )
+                    --disp8scale;
+                break;
             }
             break;
 
@@ -6587,6 +6592,22 @@  x86_emulate(
         op_bytes = 16 << vex.l;
         goto simd_0f_cvt;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x5b): /* vcvtps2dq [xyz]mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_F3(0x0f, 0x5b): /* vcvttps2dq [xyz]mm/mem,[xyz]mm{k} */
+        generate_exception_if(evex.w, EXC_UD);
+        /* fall through */
+    case X86EMUL_OPC_EVEX(0x0f, 0x5b):    /* vcvtdq2ps [xyz]mm/mem,[xyz]mm{k} */
+                                          /* vcvtqq2ps [xyz]mm/mem,{x,y}mm{k} */
+        if ( evex.w )
+            host_and_vcpu_must_have(avx512dq);
+        else
+            host_and_vcpu_must_have(avx512f);
+        if ( ea.type != OP_REG || !evex.brs )
+            avx512_vlen_check(false);
+        d |= TwoOp;
+        op_bytes = 16 << evex.lr;
+        goto simd_zmm;
+
     CASE_SIMD_PACKED_INT(0x0f, 0x60):    /* punpcklbw {,x}mm/mem,{,x}mm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x60): /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
     CASE_SIMD_PACKED_INT(0x0f, 0x61):    /* punpcklwd {,x}mm/mem,{,x}mm */
@@ -7251,6 +7272,27 @@  x86_emulate(
         op_bytes = 8;
         goto simd_0f_xmm;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xe6):   /* vcvttpd2dq [xyz]mm/mem,{x,y}mm{k} */
+    case X86EMUL_OPC_EVEX_F2(0x0f, 0xe6):   /* vcvtpd2dq [xyz]mm/mem,{x,y}mm{k} */
+        generate_exception_if(!evex.w, EXC_UD);
+        /* fall through */
+    case X86EMUL_OPC_EVEX_F3(0x0f, 0xe6):   /* vcvtdq2pd {x,y}mm/mem,[xyz]mm{k} */
+                                            /* vcvtqq2pd [xyz]mm/mem,[xyz]mm{k} */
+        if ( evex.pfx != vex_f3 )
+            host_and_vcpu_must_have(avx512f);
+        else if ( evex.w )
+            host_and_vcpu_must_have(avx512dq);
+        else
+        {
+            host_and_vcpu_must_have(avx512f);
+            generate_exception_if(ea.type != OP_MEM && evex.brs, EXC_UD);
+        }
+        if ( ea.type != OP_REG || !evex.brs )
+            avx512_vlen_check(false);
+        d |= TwoOp;
+        op_bytes = 8 << (evex.w + evex.lr);
+        goto simd_zmm;
+
     case X86EMUL_OPC_F2(0x0f, 0xf0):     /* lddqu m128,xmm */
     case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu mem,{x,y}mm */
         generate_exception_if(ea.type != OP_MEM, EXC_UD);