@@ -520,6 +520,17 @@ static const struct test avx512er_512[]
INSN(rsqrt28, 66, 0f38, cd, el, sd, el),
};
+static const struct test avx512pf_512[] = {
+ INSNX(gatherpf0d, 66, 0f38, c6, 1, vl, sd, el),
+ INSNX(gatherpf0q, 66, 0f38, c7, 1, vl, sd, el),
+ INSNX(gatherpf1d, 66, 0f38, c6, 2, vl, sd, el),
+ INSNX(gatherpf1q, 66, 0f38, c7, 2, vl, sd, el),
+ INSNX(scatterpf0d, 66, 0f38, c6, 5, vl, sd, el),
+ INSNX(scatterpf0q, 66, 0f38, c7, 5, vl, sd, el),
+ INSNX(scatterpf1d, 66, 0f38, c6, 6, vl, sd, el),
+ INSNX(scatterpf1q, 66, 0f38, c7, 6, vl, sd, el),
+};
+
static const struct test avx512_vbmi_all[] = {
INSN(permb, 66, 0f38, 8d, vl, b, vl),
INSN(permi2b, 66, 0f38, 75, vl, b, vl),
@@ -580,7 +591,7 @@ static bool record_access(enum x86_segme
static int read(enum x86_segment seg, unsigned long offset, void *p_data,
unsigned int bytes, struct x86_emulate_ctxt *ctxt)
{
- if ( !record_access(seg, offset, bytes) )
+ if ( !record_access(seg, offset, bytes + !bytes) )
return X86EMUL_UNHANDLEABLE;
memset(p_data, 0, bytes);
return X86EMUL_OKAY;
@@ -589,7 +600,7 @@ static int read(enum x86_segment seg, un
static int write(enum x86_segment seg, unsigned long offset, void *p_data,
unsigned int bytes, struct x86_emulate_ctxt *ctxt)
{
- if ( !record_access(seg, offset, bytes) )
+ if ( !record_access(seg, offset, bytes + !bytes) )
return X86EMUL_UNHANDLEABLE;
return X86EMUL_OKAY;
}
@@ -597,7 +608,7 @@ static int write(enum x86_segment seg, u
static void test_one(const struct test *test, enum vl vl,
unsigned char *instr, struct x86_emulate_ctxt *ctxt)
{
- unsigned int vsz, esz, i;
+ unsigned int vsz, esz, i, n;
int rc;
bool sg = strstr(test->mnemonic, "gather") ||
strstr(test->mnemonic, "scatter");
@@ -725,10 +736,20 @@ static void test_one(const struct test *
for ( i = 0; i < (test->scale == SC_vl ? vsz : esz); ++i )
if ( accessed[i] )
goto fail;
- for ( ; i < (test->scale == SC_vl ? vsz : esz) + (sg ? esz : vsz); ++i )
+
+ n = test->scale == SC_vl ? vsz : esz;
+ if ( !sg )
+ n += vsz;
+ else if ( !strstr(test->mnemonic, "pf") )
+ n += esz;
+ else
+ ++n;
+
+ for ( ; i < n; ++i )
if ( accessed[i] != (sg ? (vsz / esz) >> (test->opc & 1 & !evex.w)
: 1) )
goto fail;
+
for ( ; i < ARRAY_SIZE(accessed); ++i )
if ( accessed[i] )
goto fail;
@@ -887,6 +908,8 @@ void evex_disp8_test(void *instr, struct
RUN(avx512dq, no128);
RUN(avx512dq, 512);
RUN(avx512er, 512);
+#define cpu_has_avx512pf cpu_has_avx512f
+ RUN(avx512pf, 512);
RUN(avx512_vbmi, all);
RUN(avx512_vbmi2, all);
}
@@ -73,6 +73,7 @@ bool emul_test_init(void)
*/
cp.basic.movbe = true;
cp.feat.adx = true;
+ cp.feat.avx512pf = cp.feat.avx512f;
cp.feat.rdpid = true;
cp.extd.clzero = true;
@@ -135,12 +136,14 @@ int emul_test_cpuid(
res->c |= 1U << 22;
/*
- * The emulator doesn't itself use ADCX/ADOX/RDPID, so we can always run
- * the respective tests.
+ * The emulator doesn't itself use ADCX/ADOX/RDPID nor the S/G prefetch
+ * insns, so we can always run the respective tests.
*/
if ( leaf == 7 && subleaf == 0 )
{
res->b |= 1U << 19;
+ if ( res->b & (1U << 16) )
+ res->b |= 1U << 26;
res->c |= 1U << 22;
}
@@ -525,6 +525,7 @@ static const struct ext0f38_table {
[0xbd] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
[0xbe] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
[0xbf] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+ [0xc6 ... 0xc7] = { .simd_size = simd_other, .vsib = 1, .d8s = d8s_dq },
[0xc8] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
[0xc9] = { .simd_size = simd_other },
[0xca] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
@@ -1903,6 +1904,7 @@ static bool vcpu_has(
#define vcpu_has_smap() vcpu_has( 7, EBX, 20, ctxt, ops)
#define vcpu_has_clflushopt() vcpu_has( 7, EBX, 23, ctxt, ops)
#define vcpu_has_clwb() vcpu_has( 7, EBX, 24, ctxt, ops)
+#define vcpu_has_avx512pf() vcpu_has( 7, EBX, 26, ctxt, ops)
#define vcpu_has_avx512er() vcpu_has( 7, EBX, 27, ctxt, ops)
#define vcpu_has_sha() vcpu_has( 7, EBX, 29, ctxt, ops)
#define vcpu_has_avx512bw() vcpu_has( 7, EBX, 30, ctxt, ops)
@@ -9425,6 +9427,94 @@ x86_emulate(
state->simd_size = simd_none;
break;
+ }
+
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0xc6):
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0xc7):
+ {
+#ifndef __XEN__
+ typeof(evex) *pevex;
+ union {
+ int32_t dw[16];
+ int64_t qw[8];
+ } index;
+#endif
+
+ ASSERT(ea.type == OP_MEM);
+ generate_exception_if((!cpu_has_avx512f || !evex.opmsk || evex.brs ||
+ evex.z || evex.reg != 0xf || evex.lr != 2),
+ EXC_UD);
+
+ switch ( modrm_reg & 7 )
+ {
+ case 1: /* vgatherpf0{d,q}p{s,d} mem{k} */
+ case 2: /* vgatherpf1{d,q}p{s,d} mem{k} */
+ case 5: /* vscatterpf0{d,q}p{s,d} mem{k} */
+ case 6: /* vscatterpf1{d,q}p{s,d} mem{k} */
+ vcpu_must_have(avx512pf);
+ break;
+ default:
+ generate_exception(EXC_UD);
+ }
+
+ get_fpu(X86EMUL_FPU_zmm);
+
+#ifndef __XEN__
+ /*
+ * For the test harness perform zero byte memory accesses, such that
+ * in particular correct Disp8 scaling can be verified.
+ */
+ fail_if((modrm_reg & 4) && !ops->write);
+
+ /* Read index register. */
+ opc = init_evex(stub);
+ pevex = copy_EVEX(opc, evex);
+ pevex->opcx = vex_0f;
+ /* vmovdqu{32,64} */
+ opc[0] = 0x7f;
+ pevex->pfx = vex_f3;
+ pevex->w = b & 1;
+ /* Use (%rax) as destination and sib_index as source. */
+ pevex->b = 1;
+ opc[1] = (state->sib_index & 7) << 3;
+ pevex->r = !mode_64bit() || !(state->sib_index & 0x08);
+ pevex->R = !mode_64bit() || !(state->sib_index & 0x10);
+ pevex->RX = 1;
+ opc[2] = 0xc3;
+
+ invoke_stub("", "", "=m" (index) : "a" (&index));
+ put_stub(stub);
+
+ /* Clear untouched parts of the mask value. */
+ n = 1 << (4 - ((b & 1) | evex.w));
+ op_mask &= (1 << n) - 1;
+
+ for ( i = 0; rc == X86EMUL_OKAY && op_mask; ++i )
+ {
+ signed long idx = b & 1 ? index.qw[i] : index.dw[i];
+
+ if ( !(op_mask & (1 << i)) )
+ continue;
+
+ rc = (modrm_reg & 4
+ ? ops->write
+ : ops->read)(ea.mem.seg,
+ truncate_ea(ea.mem.off +
+ (idx << state->sib_scale)),
+ NULL, 0, ctxt);
+ if ( rc == X86EMUL_EXCEPTION )
+ {
+ /* Squash memory access related exceptions. */
+ x86_emul_reset_event(ctxt);
+ rc = X86EMUL_OKAY;
+ }
+
+ op_mask &= ~(1 << i);
+ }
+#endif
+
+ state->simd_size = simd_none;
+ break;
}
case X86EMUL_OPC(0x0f38, 0xc8): /* sha1nexte xmm/m128,xmm */
Some adjustments are necessary to the EVEX Disp8 scaling test code to account for the zero byte reads/writes, which get issued for the test harness only. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v8: #GP/#SS don't arise here. Add previously missed change to emul_test_init(). v7: Re-base. v6: New.