@@ -146,6 +146,7 @@ static const struct test avx512f_all[] =
INSN_SFP(mov, 0f, 11),
INSN_PFP_NB(mova, 0f, 28),
INSN_PFP_NB(mova, 0f, 29),
+ INSN(movddup, f2, 0f, 12, vl, q_nb, vl),
INSN(movdqa32, 66, 0f, 6f, vl, d_nb, vl),
INSN(movdqa32, 66, 0f, 7f, vl, d_nb, vl),
INSN(movdqa64, 66, 0f, 6f, vl, q_nb, vl),
@@ -157,6 +158,8 @@ static const struct test avx512f_all[] =
INSN(movntdq, 66, 0f, e7, vl, d_nb, vl),
INSN(movntdqa, 66, 0f38, 2a, vl, d_nb, vl),
INSN_PFP_NB(movnt, 0f, 2b),
+ INSN(movshdup, f3, 0f, 16, vl, d_nb, vl),
+ INSN(movsldup, f3, 0f, 12, vl, d_nb, vl),
INSN_PFP_NB(movu, 0f, 10),
INSN_PFP_NB(movu, 0f, 11),
INSN_FP(mul, 0f, 59),
@@ -694,6 +697,19 @@ static void test_group(const struct test
switch ( tests[i].esz )
{
+ case ESZ_q_nb:
+ /* The 128-bit form of VMOVDDUP needs special casing. */
+ if ( vl[j] == VL_128 && tests[i].spc == SPC_0f &&
+ tests[i].opc == 0x12 && tests[i].pfx == PFX_f2 )
+ {
+ struct test test = tests[i];
+
+ test.vsz = VSZ_el;
+ test.scale = SC_el;
+ test_one(&test, vl[j], instr, ctxt);
+ continue;
+ }
+ /* fall through */
default:
test_one(&tests[i], vl[j], instr, ctxt);
break;
@@ -326,8 +326,11 @@ REN(pandn, , d);
REN(por, , d);
REN(pxor, , d);
# endif
+OVR(movddup);
OVR(movntdq);
OVR(movntdqa);
+OVR(movshdup);
+OVR(movsldup);
OVR(pmovsxbd);
OVR(pmovsxbq);
OVR(pmovsxdq);
@@ -3048,6 +3048,15 @@ x86_decode(
switch ( b )
{
+ case 0x12: /* vmovsldup / vmovddup */
+ if ( evex.pfx == vex_f2 )
+ disp8scale = evex.lr ? 4 + evex.lr : 3;
+ /* fall through */
+ case 0x16: /* vmovshdup */
+ if ( evex.pfx == vex_f3 )
+ disp8scale = 4 + evex.lr;
+ break;
+
case 0x20: /* mov cr,reg */
case 0x21: /* mov dr,reg */
case 0x22: /* mov reg,cr */
@@ -6066,6 +6075,20 @@ x86_emulate(
host_and_vcpu_must_have(sse3);
goto simd_0f_xmm;
+ case X86EMUL_OPC_EVEX_F3(0x0f, 0x12): /* vmovsldup [xyz]mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_F2(0x0f, 0x12): /* vmovddup [xyz]mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_F3(0x0f, 0x16): /* vmovshdup [xyz]mm/mem,[xyz]mm{k} */
+ generate_exception_if((evex.brs ||
+ evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK)),
+ EXC_UD);
+ host_and_vcpu_must_have(avx512f);
+ avx512_vlen_check(false);
+ d |= TwoOp;
+ op_bytes = !(evex.pfx & VEX_PREFIX_DOUBLE_MASK) || evex.lr
+ ? 16 << evex.lr : 8;
+ fault_suppression = false;
+ goto simd_zmm;
+
CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x14): /* vunpcklp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x15): /* vunpckhp{s,d} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
generate_exception_if(evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK),
Judging from insn prefixes, these are scalar insns, but their (memory) operands are vector ones (with the exception of 128-bit VMOVDDUP). For this some adjustments to disp8scale calculation code are needed. No explicit test harness additions other than the overrides, as the compiler already makes use of the insns. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v7: Re-base. v6: Fix Disp8 test for VMOVDDUP when AVX512VL is unavailable. v4: New.