diff mbox series

[v8,21/50] x86emul: support AVX512F legacy-equivalent scalar int/FP conversion insns

Message ID 5C8B83D6020000780021F208@prv1-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show
Series x86emul: remaining AVX512 support | expand

Commit Message

Jan Beulich March 15, 2019, 10:52 a.m. UTC
VCVT{,T}S{S,D}2SI use EVEX.W for their destination (register) rather
than their (possibly memory) source operand size and hence need a
"manual" override of disp8scale.

While the SDM claims that EVEX.L'L needs to be zero for the 32-bit forms
of VCVT{,U}SI2SD (exception type E10NF), observations on my test system
do not confirm this (and I've got informal confirmation that this is a
doc mistake). Nevertheless, to be on the safe side, force evex.lr to be
zero in this case though when constructing the stub.

Slightly adjust the scalar to_int() in the test harness, to increase the
chances of the operand ending up in memory.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v7: Fix VCVTSI2SS - cannot re-use VMOV{D,Q} code here, as the register
    form can't be converted to a memory one when embedded rounding is in
    effect. Force evex.lr to zero for 32-bit VCVTSI2SD. Permit embedded
    rounding for VCVT{,T}S{S,D}2SI. Re-base.
v4: New.

Comments

Andrew Cooper May 21, 2019, 11:44 a.m. UTC | #1
On 15/03/2019 10:52, Jan Beulich wrote:
> VCVT{,T}S{S,D}2SI use EVEX.W for their destination (register) rather
> than their (possibly memory) source operand size and hence need a
> "manual" override of disp8scale.
>
> While the SDM claims that EVEX.L'L needs to be zero for the 32-bit forms
> of VCVT{,U}SI2SD (exception type E10NF), observations on my test system
> do not confirm this (and I've got informal confirmation that this is a
> doc mistake). Nevertheless, to be on the safe side, force evex.lr to be
> zero in this case though when constructing the stub.
>
> Slightly adjust the scalar to_int() in the test harness, to increase the
> chances of the operand ending up in memory.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff mbox series

Patch

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -117,8 +117,16 @@  static const struct test avx512f_all[] =
     INSN(cvtps2dq,     66,   0f, 5b,    vl,      d, vl),
     INSN(cvtps2pd,       ,   0f, 5a,    vl_2,    d, vl),
     INSN(cvtps2ph,     66, 0f3a, 1d,    vl_2, d_nb, vl),
+    INSN(cvtsd2si,     f2,   0f, 2d,    el,      q, el),
     INSN(cvtsd2ss,     f2,   0f, 5a,    el,      q, el),
+    INSN(cvtsi2sd,     f2,   0f, 2a,    el,   dq64, el),
+    INSN(cvtsi2ss,     f3,   0f, 2a,    el,   dq64, el),
     INSN(cvtss2sd,     f3,   0f, 5a,    el,      d, el),
+    INSN(cvtss2si,     f3,   0f, 2d,    el,      d, el),
+    INSN(cvttpd2dq,    66,   0f, e6,    vl,      q, vl),
+    INSN(cvttps2dq,    f3,   0f, 5b,    vl,      d, vl),
+    INSN(cvttsd2si,    f2,   0f, 2c,    el,      q, el),
+    INSN(cvttss2si,    f3,   0f, 2c,    el,      d, el),
     INSN_FP(div,             0f, 5e),
     INSN(fmadd132,     66, 0f38, 98,    vl,     sd, vl),
     INSN(fmadd132,     66, 0f38, 99,    el,     sd, el),
@@ -746,8 +754,9 @@  static void test_group(const struct test
                 break;
 
             case ESZ_dq:
-                test_pair(&tests[i], vl[j], ESZ_d, "d", ESZ_q, "q",
-                          instr, ctxt);
+                test_pair(&tests[i], vl[j], ESZ_d,
+                          strncmp(tests[i].mnemonic, "cvt", 3) ? "d" : "l",
+                          ESZ_q, "q", instr, ctxt);
                 break;
 
 #ifdef __i386__
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -89,7 +89,7 @@  static inline bool _to_bool(byte_vec_t b
 #endif
 
 #if VEC_SIZE == FLOAT_SIZE
-# define to_int(x) ((vec_t){ (int)(x)[0] })
+# define to_int(x) ({ int i_ = (x)[0]; touch(i_); ((vec_t){ i_ }); })
 #elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
 # define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
 #elif defined(FLOAT_SIZE) && VEC_SIZE > FLOAT_SIZE && defined(__AVX512F__) && \
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -340,10 +340,28 @@  OVR(cvtps2dq);
 OVR(cvtps2pd);
 OVR(cvtps2ph);
 OVR(cvtsd2ss);
+OVR(cvtsd2si);
+OVR(cvtsd2sil);
+OVR(cvtsd2siq);
+OVR(cvtsi2sd);
+OVR(cvtsi2sdl);
+OVR(cvtsi2sdq);
+OVR(cvtsi2ss);
+OVR(cvtsi2ssl);
+OVR(cvtsi2ssq);
 OVR(cvtss2sd);
+OVR(cvtss2si);
+OVR(cvtss2sil);
+OVR(cvtss2siq);
 OVR(cvttpd2dqx);
 OVR(cvttpd2dqy);
 OVR(cvttps2dq);
+OVR(cvttsd2si);
+OVR(cvttsd2sil);
+OVR(cvttsd2siq);
+OVR(cvttss2si);
+OVR(cvttss2sil);
+OVR(cvttss2siq);
 OVR(movddup);
 OVR(movntdq);
 OVR(movntdqa);
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -296,7 +296,7 @@  static const struct twobyte_table {
     [0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM },
     [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
     [0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp, d8s_vl },
-    [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+    [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_dq64 },
     [0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp, d8s_vl },
     [0x2c ... 0x2d] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     [0x2e ... 0x2f] = { ImplicitOps|ModRM|TwoOp, simd_none, d8s_dq },
@@ -3072,6 +3072,12 @@  x86_decode(
                 modrm_mod = 3;
                 break;
 
+            case 0x2c: /* vcvtts{s,d}2si need special casing */
+            case 0x2d: /* vcvts{s,d}2si need special casing */
+                if ( evex_encoded() )
+                    disp8scale = 2 + (evex.pfx & VEX_PREFIX_DOUBLE_MASK);
+                break;
+
             case 0x5a: /* vcvtps2pd needs special casing */
                 if ( disp8scale && !evex.pfx && !evex.brs )
                     --disp8scale;
@@ -6199,6 +6205,48 @@  x86_emulate(
         state->simd_size = simd_none;
         goto simd_0f_rm;
 
+    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */
+        generate_exception_if(evex.opmsk || (ea.type != OP_REG && evex.brs),
+                              EXC_UD);
+        host_and_vcpu_must_have(avx512f);
+        if ( !evex.brs )
+            avx512_vlen_check(true);
+        get_fpu(X86EMUL_FPU_zmm);
+
+        if ( ea.type == OP_MEM )
+        {
+            rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val,
+                            rex_prefix & REX_W ? 8 : 4, ctxt, ops);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
+        }
+        else
+            src.val = *ea.reg;
+
+        opc = init_evex(stub);
+        opc[0] = b;
+        /* Convert memory/GPR source to %rAX. */
+        evex.b = 1;
+        if ( !mode_64bit() )
+            evex.w = 0;
+        /*
+         * SDM version 067 claims that exception type E10NF implies #UD when
+         * EVEX.L'L is non-zero for 32-bit VCVT{,U}SI2SD. Experimentally this
+         * cannot be confirmed, but be on the safe side for the stub.
+         */
+        if ( !evex.w && evex.pfx == vex_f2 )
+            evex.lr = 0;
+        opc[1] = (modrm & 0x38) | 0xc0;
+        insn_bytes = EVEX_PFX_BYTES + 2;
+        opc[2] = 0xc3;
+
+        copy_EVEX(opc, evex);
+        invoke_stub("", "", "=g" (dummy) : "a" (src.val));
+
+        put_stub(stub);
+        state->simd_size = simd_none;
+        break;
+
     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2c):     /* cvtts{s,d}2si xmm/mem,reg */
     CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2d):     /* cvts{s,d}2si xmm/mem,reg */
@@ -6222,14 +6270,17 @@  x86_emulate(
         }
 
         opc = init_prefixes(stub);
+    cvts_2si:
         opc[0] = b;
         /* Convert GPR destination to %rAX and memory operand to (%rCX). */
         rex_prefix &= ~REX_R;
         vex.r = 1;
+        evex.r = 1;
         if ( ea.type == OP_MEM )
         {
             rex_prefix &= ~REX_B;
             vex.b = 1;
+            evex.b = 1;
             opc[1] = 0x01;
 
             rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
@@ -6240,11 +6291,22 @@  x86_emulate(
         else
             opc[1] = modrm & 0xc7;
         if ( !mode_64bit() )
+        {
             vex.w = 0;
-        insn_bytes = PFX_BYTES + 2;
+            evex.w = 0;
+        }
+        if ( evex_encoded() )
+        {
+            insn_bytes = EVEX_PFX_BYTES + 2;
+            copy_EVEX(opc, evex);
+        }
+        else
+        {
+            insn_bytes = PFX_BYTES + 2;
+            copy_REX_VEX(opc, rex_prefix, vex);
+        }
         opc[2] = 0xc3;
 
-        copy_REX_VEX(opc, rex_prefix, vex);
         ea.reg = decode_gpr(&_regs, modrm_reg);
         invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp));
 
@@ -6252,6 +6314,18 @@  x86_emulate(
         state->simd_size = simd_none;
         break;
 
+    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
+    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
+        generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
+                               (ea.type != OP_REG && evex.brs)),
+                              EXC_UD);
+        host_and_vcpu_must_have(avx512f);
+        if ( !evex.brs )
+            avx512_vlen_check(true);
+        get_fpu(X86EMUL_FPU_zmm);
+        opc = init_evex(stub);
+        goto cvts_2si;
+
     CASE_SIMD_PACKED_FP(, 0x0f, 0x2e):     /* ucomis{s,d} xmm/mem,xmm */
     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
     CASE_SIMD_PACKED_FP(, 0x0f, 0x2f):     /* comis{s,d} xmm/mem,xmm */