@@ -457,11 +457,17 @@ static const struct test avx512dq_all[]
INSN(cvttps2uqq, 66, 0f, 78, vl_2, d, vl),
INSN(cvtuqq2pd, f3, 0f, 7a, vl, q, vl),
INSN(cvtuqq2ps, f2, 0f, 7a, vl, q, vl),
+ INSN(fpclass, 66, 0f3a, 66, vl, sd, vl),
+ INSN(fpclass, 66, 0f3a, 67, el, sd, el),
INSN_PFP(or, 0f, 56),
// pmovd2m, f3, 0f38, 39, d
// pmovm2, f3, 0f38, 38, dq
// pmovq2m, f3, 0f38, 39, q
INSN(pmullq, 66, 0f38, 40, vl, q, vl),
+ INSN(range, 66, 0f3a, 50, vl, sd, vl),
+ INSN(range, 66, 0f3a, 51, el, sd, el),
+ INSN(reduce, 66, 0f3a, 56, vl, sd, vl),
+ INSN(reduce, 66, 0f3a, 57, el, sd, el),
INSN_PFP(xor, 0f, 57),
};
@@ -285,10 +285,18 @@ static inline vec_t movlhps(vec_t x, vec
# define broadcast_octet(x) B(broadcastf32x8_, _mask, x, undef(), ~0)
# define insert_octet(x, y, p) B(insertf32x8_, _mask, x, y, p, undef(), ~0)
# endif
+# ifdef __AVX512DQ__
+# define frac(x) B(reduceps, _mask, x, 0b00001011, undef(), ~0)
+# endif
# define getexp(x) BR(getexpps, _mask, x, undef(), ~0)
# define getmant(x) BR(getmantps, _mask, x, 0, undef(), ~0)
-# define max(x, y) BR_(maxps, _mask, x, y, undef(), ~0)
-# define min(x, y) BR_(minps, _mask, x, y, undef(), ~0)
+# ifdef __AVX512DQ__
+# define max(x, y) BR(rangeps, _mask, x, y, 0b0101, undef(), ~0)
+# define min(x, y) BR(rangeps, _mask, x, y, 0b0100, undef(), ~0)
+# else
+# define max(x, y) BR_(maxps, _mask, x, y, undef(), ~0)
+# define min(x, y) BR_(minps, _mask, x, y, undef(), ~0)
+# endif
# define mix(x, y) B(movaps, _mask, x, y, (0b0101010101010101 & ALL_TRUE))
# define scale(x, y) BR(scalefps, _mask, x, y, undef(), ~0)
# if VEC_SIZE == 64 && defined(__AVX512ER__)
@@ -350,10 +358,18 @@ static inline vec_t movlhps(vec_t x, vec
# define broadcast_quartet(x) B(broadcastf64x4_, , x, undef(), ~0)
# define insert_quartet(x, y, p) B(insertf64x4_, _mask, x, y, p, undef(), ~0)
# endif
+# ifdef __AVX512DQ__
+# define frac(x) B(reducepd, _mask, x, 0b00001011, undef(), ~0)
+# endif
# define getexp(x) BR(getexppd, _mask, x, undef(), ~0)
# define getmant(x) BR(getmantpd, _mask, x, 0, undef(), ~0)
-# define max(x, y) BR_(maxpd, _mask, x, y, undef(), ~0)
-# define min(x, y) BR_(minpd, _mask, x, y, undef(), ~0)
+# ifdef __AVX512DQ__
+# define max(x, y) BR(rangepd, _mask, x, y, 0b0101, undef(), ~0)
+# define min(x, y) BR(rangepd, _mask, x, y, 0b0100, undef(), ~0)
+# else
+# define max(x, y) BR_(maxpd, _mask, x, y, undef(), ~0)
+# define min(x, y) BR_(minpd, _mask, x, y, undef(), ~0)
+# endif
# define mix(x, y) B(movapd, _mask, x, y, 0b01010101)
# define scale(x, y) BR(scalefpd, _mask, x, y, undef(), ~0)
# if VEC_SIZE == 64 && defined(__AVX512ER__)
@@ -3962,6 +3962,39 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+
+ printf("%-40s", "Testing vfpclasspsz $0x46,64(%edx),%k2...");
+ if ( stack_exec && cpu_has_avx512dq )
+ {
+ decl_insn(vfpclassps);
+
+ asm volatile ( put_insn(vfpclassps,
+ /* 0x46: check for +/- 0 and neg. */
+ "vfpclasspsz $0x46, 64(%0), %%k2")
+ :: "d" (NULL) );
+
+ set_insn(vfpclassps);
+ for ( i = 0; i < 3; ++i )
+ {
+ res[16 + i * 5 + 0] = 0x00000000; /* +0 */
+ res[16 + i * 5 + 1] = 0x80000000; /* -0 */
+ res[16 + i * 5 + 2] = 0x80000001; /* -DEN */
+ res[16 + i * 5 + 3] = 0xff000000; /* -FIN */
+ res[16 + i * 5 + 4] = 0x7f000000; /* +FIN */
+ }
+ res[31] = 0;
+ regs.edx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vfpclassps) )
+ goto fail;
+ asm volatile ( "kmovw %%k2, %0" : "=g" (rc) );
+ if ( rc != 0xbdef )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
#undef decl_insn
#undef put_insn
#undef set_insn
@@ -582,10 +582,16 @@ static const struct ext0f3a_table {
[0x48 ... 0x49] = { .simd_size = simd_packed_fp, .four_op = 1 },
[0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
[0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
+ [0x50] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+ [0x51] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
[0x54] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
[0x55] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+ [0x56] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+ [0x57] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
[0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 },
[0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
+ [0x66] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+ [0x67] = { .simd_size = simd_scalar_vexw, .two_op = 1, .d8s = d8s_dq },
[0x68 ... 0x69] = { .simd_size = simd_packed_fp, .four_op = 1 },
[0x6a ... 0x6b] = { .simd_size = simd_scalar_opc, .four_op = 1 },
[0x6c ... 0x6d] = { .simd_size = simd_packed_fp, .four_op = 1 },
@@ -9696,6 +9702,10 @@ x86_emulate(
op_bytes = 4;
goto simd_imm8_zmm;
+ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x50): /* vrangep{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x56): /* vreducep{s,d} $imm8,[xyz]mm/mem,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512dq);
+ /* fall through */
case X86EMUL_OPC_EVEX_66(0x0f3a, 0x26): /* vgetmantp{s,d} $imm8,[xyz]mm/mem,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f3a, 0x54): /* vfixupimmp{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
host_and_vcpu_must_have(avx512f);
@@ -9703,6 +9713,10 @@ x86_emulate(
avx512_vlen_check(false);
goto simd_imm8_zmm;
+ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x51): /* vranges{s,d} $imm8,xmm/mem,xmm,xmm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x57): /* vreduces{s,d} $imm8,xmm/mem,xmm,xmm{k} */
+ host_and_vcpu_must_have(avx512dq);
+ /* fall through */
case X86EMUL_OPC_EVEX_66(0x0f3a, 0x27): /* vgetmants{s,d} $imm8,xmm/mem,xmm,xmm{k} */
case X86EMUL_OPC_EVEX_66(0x0f3a, 0x55): /* vfixupimms{s,d} $imm8,xmm/mem,xmm,xmm{k} */
host_and_vcpu_must_have(avx512f);
@@ -9858,6 +9872,16 @@ x86_emulate(
dst.type = OP_NONE;
break;
+ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x66): /* vfpclassp{s,d} $imm8,[xyz]mm/mem,k{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x67): /* vfpclasss{s,d} $imm8,[xyz]mm/mem,k{k} */
+ host_and_vcpu_must_have(avx512dq);
+ generate_exception_if(!evex.r || !evex.R || evex.z, EXC_UD);
+ if ( !(b & 1) )
+ goto avx512f_imm8_no_sae;
+ generate_exception_if(evex.brs, EXC_UD);
+ avx512_vlen_check(true);
+ goto simd_imm8_zmm;
+
case X86EMUL_OPC(0x0f3a, 0xcc): /* sha1rnds4 $imm8,xmm/m128,xmm */
host_and_vcpu_must_have(sha);
op_bytes = 16;
This completes support of AVX512DQ in the insn emulator. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v7: Fix vector length check for scalar insns. Re-base. v5: New.