@@ -117,8 +117,16 @@ static const struct test avx512f_all[] =
INSN(cvtps2dq, 66, 0f, 5b, vl, d, vl),
INSN(cvtps2pd, , 0f, 5a, vl_2, d, vl),
INSN(cvtps2ph, 66, 0f3a, 1d, vl_2, d_nb, vl),
+ INSN(cvtsd2si, f2, 0f, 2d, el, q, el),
INSN(cvtsd2ss, f2, 0f, 5a, el, q, el),
+ INSN(cvtsi2sd, f2, 0f, 2a, el, dq64, el),
+ INSN(cvtsi2ss, f3, 0f, 2a, el, dq64, el),
INSN(cvtss2sd, f3, 0f, 5a, el, d, el),
+ INSN(cvtss2si, f3, 0f, 2d, el, d, el),
+ INSN(cvttpd2dq, 66, 0f, e6, vl, q, vl),
+ INSN(cvttps2dq, f3, 0f, 5b, vl, d, vl),
+ INSN(cvttsd2si, f2, 0f, 2c, el, q, el),
+ INSN(cvttss2si, f3, 0f, 2c, el, d, el),
INSN_FP(div, 0f, 5e),
INSN(fmadd132, 66, 0f38, 98, vl, sd, vl),
INSN(fmadd132, 66, 0f38, 99, el, sd, el),
@@ -746,8 +754,9 @@ static void test_group(const struct test
break;
case ESZ_dq:
- test_pair(&tests[i], vl[j], ESZ_d, "d", ESZ_q, "q",
- instr, ctxt);
+ test_pair(&tests[i], vl[j], ESZ_d,
+ strncmp(tests[i].mnemonic, "cvt", 3) ? "d" : "l",
+ ESZ_q, "q", instr, ctxt);
break;
#ifdef __i386__
@@ -89,7 +89,7 @@ static inline bool _to_bool(byte_vec_t b
#endif
#if VEC_SIZE == FLOAT_SIZE
-# define to_int(x) ((vec_t){ (int)(x)[0] })
+# define to_int(x) ({ int i_ = (x)[0]; touch(i_); ((vec_t){ i_ }); })
#elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
# define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
#elif defined(FLOAT_SIZE) && VEC_SIZE > FLOAT_SIZE && defined(__AVX512F__) && \
@@ -340,10 +340,28 @@ OVR(cvtps2dq);
OVR(cvtps2pd);
OVR(cvtps2ph);
OVR(cvtsd2ss);
+OVR(cvtsd2si);
+OVR(cvtsd2sil);
+OVR(cvtsd2siq);
+OVR(cvtsi2sd);
+OVR(cvtsi2sdl);
+OVR(cvtsi2sdq);
+OVR(cvtsi2ss);
+OVR(cvtsi2ssl);
+OVR(cvtsi2ssq);
OVR(cvtss2sd);
+OVR(cvtss2si);
+OVR(cvtss2sil);
+OVR(cvtss2siq);
OVR(cvttpd2dqx);
OVR(cvttpd2dqy);
OVR(cvttps2dq);
+OVR(cvttsd2si);
+OVR(cvttsd2sil);
+OVR(cvttsd2siq);
+OVR(cvttss2si);
+OVR(cvttss2sil);
+OVR(cvttss2siq);
OVR(movddup);
OVR(movntdq);
OVR(movntdqa);
@@ -296,7 +296,7 @@ static const struct twobyte_table {
[0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM },
[0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
[0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp, d8s_vl },
- [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+ [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_dq64 },
[0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp, d8s_vl },
[0x2c ... 0x2d] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0x2e ... 0x2f] = { ImplicitOps|ModRM|TwoOp, simd_none, d8s_dq },
@@ -3072,6 +3072,12 @@ x86_decode(
modrm_mod = 3;
break;
+ case 0x2c: /* vcvtts{s,d}2si need special casing */
+ case 0x2d: /* vcvts{s,d}2si need special casing */
+ if ( evex_encoded() )
+ disp8scale = 2 + (evex.pfx & VEX_PREFIX_DOUBLE_MASK);
+ break;
+
case 0x5a: /* vcvtps2pd needs special casing */
if ( disp8scale && !evex.pfx && !evex.brs )
--disp8scale;
@@ -6199,6 +6205,48 @@ x86_emulate(
state->simd_size = simd_none;
goto simd_0f_rm;
+ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */
+ generate_exception_if(evex.opmsk || (ea.type != OP_REG && evex.brs),
+ EXC_UD);
+ host_and_vcpu_must_have(avx512f);
+ if ( !evex.brs )
+ avx512_vlen_check(true);
+ get_fpu(X86EMUL_FPU_zmm);
+
+ if ( ea.type == OP_MEM )
+ {
+ rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val,
+ rex_prefix & REX_W ? 8 : 4, ctxt, ops);
+ if ( rc != X86EMUL_OKAY )
+ goto done;
+ }
+ else
+ src.val = *ea.reg;
+
+ opc = init_evex(stub);
+ opc[0] = b;
+ /* Convert memory/GPR source to %rAX. */
+ evex.b = 1;
+ if ( !mode_64bit() )
+ evex.w = 0;
+ /*
+ * SDM version 067 claims that exception type E10NF implies #UD when
+ * EVEX.L'L is non-zero for 32-bit VCVT{,U}SI2SD. Experimentally this
+ * cannot be confirmed, but be on the safe side for the stub.
+ */
+ if ( !evex.w && evex.pfx == vex_f2 )
+ evex.lr = 0;
+ opc[1] = (modrm & 0x38) | 0xc0;
+ insn_bytes = EVEX_PFX_BYTES + 2;
+ opc[2] = 0xc3;
+
+ copy_EVEX(opc, evex);
+ invoke_stub("", "", "=g" (dummy) : "a" (src.val));
+
+ put_stub(stub);
+ state->simd_size = simd_none;
+ break;
+
CASE_SIMD_SCALAR_FP(, 0x0f, 0x2c): /* cvtts{s,d}2si xmm/mem,reg */
CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
CASE_SIMD_SCALAR_FP(, 0x0f, 0x2d): /* cvts{s,d}2si xmm/mem,reg */
@@ -6222,14 +6270,17 @@ x86_emulate(
}
opc = init_prefixes(stub);
+ cvts_2si:
opc[0] = b;
/* Convert GPR destination to %rAX and memory operand to (%rCX). */
rex_prefix &= ~REX_R;
vex.r = 1;
+ evex.r = 1;
if ( ea.type == OP_MEM )
{
rex_prefix &= ~REX_B;
vex.b = 1;
+ evex.b = 1;
opc[1] = 0x01;
rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
@@ -6240,11 +6291,22 @@ x86_emulate(
else
opc[1] = modrm & 0xc7;
if ( !mode_64bit() )
+ {
vex.w = 0;
- insn_bytes = PFX_BYTES + 2;
+ evex.w = 0;
+ }
+ if ( evex_encoded() )
+ {
+ insn_bytes = EVEX_PFX_BYTES + 2;
+ copy_EVEX(opc, evex);
+ }
+ else
+ {
+ insn_bytes = PFX_BYTES + 2;
+ copy_REX_VEX(opc, rex_prefix, vex);
+ }
opc[2] = 0xc3;
- copy_REX_VEX(opc, rex_prefix, vex);
ea.reg = decode_gpr(&_regs, modrm_reg);
invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp));
@@ -6252,6 +6314,18 @@ x86_emulate(
state->simd_size = simd_none;
break;
+ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
+ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
+ generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
+ (ea.type != OP_REG && evex.brs)),
+ EXC_UD);
+ host_and_vcpu_must_have(avx512f);
+ if ( !evex.brs )
+ avx512_vlen_check(true);
+ get_fpu(X86EMUL_FPU_zmm);
+ opc = init_evex(stub);
+ goto cvts_2si;
+
CASE_SIMD_PACKED_FP(, 0x0f, 0x2e): /* ucomis{s,d} xmm/mem,xmm */
CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
CASE_SIMD_PACKED_FP(, 0x0f, 0x2f): /* comis{s,d} xmm/mem,xmm */
VCVT{,T}S{S,D}2SI use EVEX.W for their destination (register) rather than their (possibly memory) source operand size and hence need a "manual" override of disp8scale. While the SDM claims that EVEX.L'L needs to be zero for the 32-bit forms of VCVT{,U}SI2SD (exception type E10NF), observations on my test system do not confirm this (and I've got informal confirmation that this is a doc mistake). Nevertheless, to be on the safe side, force evex.lr to be zero in this case though when constructing the stub. Slightly adjust the scalar to_int() in the test harness, to increase the chances of the operand ending up in memory. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v7: Fix VCVTSI2SS - cannot re-use VMOV{D,Q} code here, as the register form can't be converted to a memory one when embedded rounding is in effect. Force evex.lr to zero for 32-bit VCVTSI2SD. Permit embedded rounding for VCVT{,T}S{S,D}2SI. Re-base. v4: New.