@@ -614,12 +614,18 @@ static const struct test avx512_fp16_all
INSN(comish, , map5, 2f, el, fp16, el),
INSN(divph, , map5, 5e, vl, fp16, vl),
INSN(divsh, f3, map5, 5e, el, fp16, el),
+ INSNX(fcmaddcph, f2, map6, 56, 1, vl, d, vl),
+ INSNX(fcmaddcsh, f2, map6, 57, 1, el, d, el),
+ INSNX(fcmulcph, f2, map6, d6, 1, vl, d, vl),
+ INSNX(fcmulcsh, f2, map6, d7, 1, el, d, el),
INSN(fmadd132ph, 66, map6, 98, vl, fp16, vl),
INSN(fmadd132sh, 66, map6, 99, el, fp16, el),
INSN(fmadd213ph, 66, map6, a8, vl, fp16, vl),
INSN(fmadd213sh, 66, map6, a9, el, fp16, el),
INSN(fmadd231ph, 66, map6, b8, vl, fp16, vl),
INSN(fmadd231sh, 66, map6, b9, el, fp16, el),
+ INSNX(fmaddcph, f3, map6, 56, 1, vl, d, vl),
+ INSNX(fmaddcsh, f3, map6, 57, 1, el, d, el),
INSN(fmaddsub132ph, 66, map6, 96, vl, fp16, vl),
INSN(fmaddsub213ph, 66, map6, a6, vl, fp16, vl),
INSN(fmaddsub231ph, 66, map6, b6, vl, fp16, vl),
@@ -632,6 +638,8 @@ static const struct test avx512_fp16_all
INSN(fmsubadd132ph, 66, map6, 97, vl, fp16, vl),
INSN(fmsubadd213ph, 66, map6, a7, vl, fp16, vl),
INSN(fmsubadd231ph, 66, map6, b7, vl, fp16, vl),
+ INSNX(fmulcph, f3, map6, d6, 1, vl, d, vl),
+ INSNX(fmulcsh, f3, map6, d7, 1, el, d, el),
INSN(fnmadd132ph, 66, map6, 9c, vl, fp16, vl),
INSN(fnmadd132sh, 66, map6, 9d, el, fp16, el),
INSN(fnmadd213ph, 66, map6, ac, vl, fp16, vl),
@@ -2058,6 +2058,10 @@ static const struct evex {
{ { 0x4d }, 2, T, R, pfx_66, W0, LIG }, /* vrcpsh */
{ { 0x4e }, 2, T, R, pfx_66, W0, Ln }, /* vrsqrtph */
{ { 0x4f }, 2, T, R, pfx_66, W0, LIG }, /* vrsqrtsh */
+ { { 0x56 }, 2, T, R, pfx_f3, W0, Ln }, /* vfmaddcph */
+ { { 0x56 }, 2, T, R, pfx_f2, W0, Ln }, /* vfcmaddcph */
+ { { 0x57 }, 2, T, R, pfx_f3, W0, LIG }, /* vfmaddcsh */
+ { { 0x57 }, 2, T, R, pfx_f2, W0, LIG }, /* vfcmaddcsh */
{ { 0x96 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub132ph */
{ { 0x97 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd132ph */
{ { 0x98 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd132ph */
@@ -2088,6 +2092,10 @@ static const struct evex {
{ { 0xbd }, 2, T, R, pfx_66, W0, LIG }, /* vfnmadd231sh */
{ { 0xbe }, 2, T, R, pfx_66, W0, Ln }, /* vfnmsub231ph */
{ { 0xbf }, 2, T, R, pfx_66, W0, LIG }, /* vfnmsub231sh */
+ { { 0xd6 }, 2, T, R, pfx_f3, W0, Ln }, /* vfmulcph */
+ { { 0xd6 }, 2, T, R, pfx_f2, W0, Ln }, /* vfcmulcph */
+ { { 0xd7 }, 2, T, R, pfx_f3, W0, LIG }, /* vfmulcsh */
+ { { 0xd7 }, 2, T, R, pfx_f2, W0, LIG }, /* vfcmulcsh */
};
static const struct {
@@ -379,6 +379,8 @@ static const struct ext0f38_table {
[0x4f] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
[0x50 ... 0x53] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x54 ... 0x55] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
+ [0x56] = { .simd_size = simd_other, .d8s = d8s_vl },
+ [0x57] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
[0x58] = { .simd_size = simd_other, .two_op = 1, .d8s = 2 },
[0x59] = { .simd_size = simd_other, .two_op = 1, .d8s = 3 },
[0x5a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
@@ -441,6 +443,8 @@ static const struct ext0f38_table {
[0xcc] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
[0xcd] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
[0xcf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+ [0xd6] = { .simd_size = simd_other, .d8s = d8s_vl },
+ [0xd7] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
[0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
[0xdc ... 0xdf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0xf0] = { .two_op = 1 },
@@ -1502,6 +1506,10 @@ int x86emul_decode(struct x86_emulate_st
if ( s->evex.pfx == vex_66 )
s->fp16 = true;
break;
+
+ case 0x56: case 0x57: /* vf{,c}maddc{p,s}h */
+ case 0xd6: case 0xd7: /* vf{,c}mulc{p,s}h */
+ break;
}
disp8scale = decode_disp8scale(ext0f38_table[b].d8s, s);
@@ -7840,6 +7840,34 @@ x86_emulate(
avx512_vlen_check(true);
goto simd_zmm;
+ case X86EMUL_OPC_EVEX_F3(6, 0x56): /* vfmaddcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_F2(6, 0x56): /* vfcmaddcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_F3(6, 0xd6): /* vfmulcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_F2(6, 0xd6): /* vfcmulcph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ op_bytes = 16 << evex.lr;
+ /* fall through */
+ case X86EMUL_OPC_EVEX_F3(6, 0x57): /* vfmaddcsh xmm/m16,xmm,xmm{k} */
+ case X86EMUL_OPC_EVEX_F2(6, 0x57): /* vfcmaddcsh xmm/m16,xmm,xmm{k} */
+ case X86EMUL_OPC_EVEX_F3(6, 0xd7): /* vfmulcsh xmm/m16,xmm,xmm{k} */
+ case X86EMUL_OPC_EVEX_F2(6, 0xd7): /* vfcmulcsh xmm/m16,xmm,xmm{k} */
+ {
+ unsigned int src1 = ~evex.reg;
+
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w || ((b & 1) && ea.type != OP_REG && evex.brs),
+ EXC_UD);
+ if ( mode_64bit() )
+ src1 = (src1 & 0xf) | (!evex.RX << 4);
+ else
+ src1 &= 7;
+ generate_exception_if(modrm_reg == src1 ||
+ (ea.type != OP_MEM && modrm_reg == modrm_rm),
+ EXC_UD);
+ if ( ea.type != OP_REG || (b & 1) || !evex.brs )
+ avx512_vlen_check(!(b & 1));
+ goto simd_zmm;
+ }
+
case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
Aspects to consider are that these have 32-bit element size (pairs of FP16) and that there are restrictions on the registers valid to use. Signed-off-by: Jan Beulich <jbeulich@suse.com>