@@ -1001,7 +1001,11 @@ static void test_group(const struct test
for ( j = 0; j < nr_vl; ++j )
{
if ( vl[0] == VL_512 && vl[j] != VL_512 &&
- !cpu_policy.feat.avx512vl )
+ !cpu_policy.feat.avx512vl && !cpu_policy.feat.avx10 )
+ continue;
+
+ if ( vl[j] == VL_512 && !cpu_policy.feat.avx512f &&
+ !cpu_policy.avx10.vsz512 )
continue;
switch ( tests[i].esz )
@@ -1052,6 +1056,27 @@ static void test_group(const struct test
}
}
+/* AVX512 (sub)features implied by AVX10. */
+#define avx10_has_avx512f true
+#define avx10_has_avx512bw true
+#define avx10_has_avx512cd true
+#define avx10_has_avx512dq true
+#define avx10_has_avx512_bf16 true
+#define avx10_has_avx512_bitalg true
+#define avx10_has_avx512_fp16 true
+#define avx10_has_avx512_ifma true
+#define avx10_has_avx512_vbmi true
+#define avx10_has_avx512_vbmi2 true
+#define avx10_has_avx512_vnni true
+#define avx10_has_avx512_vpopcntdq true
+
+/* AVX512 sub-features /not/ implied by AVX10. */
+#define avx10_has_avx512er false
+#define avx10_has_avx512pf false
+#define avx10_has_avx512_4fmaps false
+#define avx10_has_avx512_4vnniw false
+#define avx10_has_avx512_vp2intersect false
+
void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt,
const struct x86_emulate_ops *ops)
{
@@ -1059,8 +1084,8 @@ void evex_disp8_test(void *instr, struct
emulops.read = read;
emulops.write = write;
-#define RUN(feat, vl) do { \
- if ( cpu_has_##feat ) \
+#define run(cond, feat, vl) do { \
+ if ( cond ) \
{ \
printf("%-40s", "Testing " #feat "/" #vl " disp8 handling..."); \
test_group(feat ## _ ## vl, ARRAY_SIZE(feat ## _ ## vl), \
@@ -1069,6 +1094,12 @@ void evex_disp8_test(void *instr, struct
} \
} while ( false )
+#define RUN(feat, vl) \
+ run(cpu_has_ ## feat || \
+ (cpu_has_avx10_1 && cpu_policy.avx10.vsz256 && avx10_has_ ## feat && \
+ (ARRAY_SIZE(vl_ ## vl) > 1 || &vl_ ## vl[0] != &vl_512[0])), \
+ feat, vl)
+
RUN(avx512f, all);
RUN(avx512f, 128);
RUN(avx512f, no128);
@@ -1091,10 +1122,15 @@ void evex_disp8_test(void *instr, struct
RUN(avx512_fp16, all);
RUN(avx512_fp16, 128);
- if ( cpu_has_avx512f )
+#undef RUN
+
+ if ( cpu_has_avx512f || cpu_has_avx10_1 )
{
+#define RUN(feat, vl) run(cpu_has_ ## feat, feat, vl)
RUN(gfni, all);
RUN(vaes, all);
RUN(vpclmulqdq, all);
+#undef RUN
}
+#undef run
}
@@ -4,7 +4,27 @@ include $(XEN_ROOT)/tools/Rules.mk
$(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
-CFLAGS += -fno-builtin -g0 $($(TESTCASE)-cflags)
+ifneq ($(filter -mavx512%,$($(TESTCASE)-cflags)),)
+
+cflags-vsz64 :=
+cflags-vsz32 := -mprefer-vector-width=256
+cflags-vsz16 := -mprefer-vector-width=128
+# Scalar tests don't set VEC_SIZE (and VEC_MAX is used by S/G ones only)
+cflags-vsz := -mprefer-vector-width=128
+
+ifneq ($(filter -DVEC_SIZE=%,$($(TESTCASE)-cflags)),)
+CFLAGS-VSZ := $(cflags-vsz$(patsubst -DVEC_SIZE=%,%,$(filter -DVEC_SIZE=%,$($(TESTCASE)-cflags))))
+else
+CFLAGS-VSZ := $(cflags-vsz$(patsubst -DVEC_MAX=%,%,$(filter -DVEC_MAX=%,$($(TESTCASE)-cflags))))
+endif
+
+else
+
+CFLAGS-VSZ :=
+
+endif
+
+CFLAGS += -fno-builtin -g0 $($(TESTCASE)-cflags) $(CFLAGS-VSZ)
LDFLAGS_DIRECT += $(shell { $(LD) -v --warn-rwx-segments; } >/dev/null 2>&1 && echo --no-warn-rwx-segments)
@@ -124,52 +124,61 @@ static bool simd_check_avx_pclmul(void)
static bool simd_check_avx512f(void)
{
- return cpu_has_avx512f;
+ return cpu_has_avx512f || cpu_has_avx10_1_512;
}
-#define simd_check_avx512f_opmask simd_check_avx512f
#define simd_check_avx512f_sg simd_check_avx512f
+static bool simd_check_avx512f_sc(void)
+{
+ return cpu_has_avx512f || cpu_has_avx10_1;
+}
+#define simd_check_avx512f_opmask simd_check_avx512f_sc
+
static bool simd_check_avx512f_vl(void)
{
- return cpu_has_avx512f && cpu_policy.feat.avx512vl;
+ return (cpu_has_avx512f && cpu_policy.feat.avx512vl) ||
+ cpu_has_avx10_1_256;
}
#define simd_check_avx512vl_sg simd_check_avx512f_vl
static bool simd_check_avx512dq(void)
{
- return cpu_has_avx512dq;
+ return cpu_has_avx512dq || cpu_has_avx10_1_512;
}
#define simd_check_avx512dq_opmask simd_check_avx512dq
static bool simd_check_avx512dq_vl(void)
{
- return cpu_has_avx512dq && cpu_policy.feat.avx512vl;
+ return (cpu_has_avx512dq && cpu_policy.feat.avx512vl) ||
+ cpu_has_avx10_1_256;
}
static bool simd_check_avx512bw(void)
{
- return cpu_has_avx512bw;
+ return cpu_has_avx512bw || cpu_has_avx10_1_512;
}
#define simd_check_avx512bw_opmask simd_check_avx512bw
static bool simd_check_avx512bw_vl(void)
{
- return cpu_has_avx512bw && cpu_policy.feat.avx512vl;
+ return (cpu_has_avx512bw && cpu_policy.feat.avx512vl) ||
+ cpu_has_avx10_1_256;
}
static bool simd_check_avx512vbmi(void)
{
- return cpu_has_avx512_vbmi;
+ return cpu_has_avx512_vbmi || cpu_has_avx10_1_512;
}
static bool simd_check_avx512vbmi_vl(void)
{
- return cpu_has_avx512_vbmi && cpu_policy.feat.avx512vl;
+ return (cpu_has_avx512_vbmi && cpu_policy.feat.avx512vl) ||
+ cpu_has_avx10_1_256;
}
static bool simd_check_avx512vbmi2(void)
{
- return cpu_has_avx512_vbmi2;
+ return cpu_has_avx512_vbmi2 || cpu_has_avx10_1_512;
}
static bool simd_check_sse4_sha(void)
@@ -250,17 +259,23 @@ static bool simd_check_avx512bw_gf_vl(vo
static bool simd_check_avx512vnni(void)
{
- return cpu_has_avx512_vnni;
+ return cpu_has_avx512_vnni || cpu_has_avx10_1_512;
}
static bool simd_check_avx512fp16(void)
{
- return cpu_has_avx512_fp16;
+ return cpu_has_avx512_fp16 || cpu_has_avx10_1_512;
+}
+
+static bool simd_check_avx512fp16_sc(void)
+{
+ return cpu_has_avx512_fp16 || cpu_has_avx10_1;
}
static bool simd_check_avx512fp16_vl(void)
{
- return cpu_has_avx512_fp16 && cpu_policy.feat.avx512vl;
+ return (cpu_has_avx512_fp16 && cpu_policy.feat.avx512vl) ||
+ cpu_has_avx10_1_256;
}
static void simd_set_regs(struct cpu_user_regs *regs)
@@ -433,9 +448,13 @@ static const struct {
SIMD(OPMASK+DQ/w, avx512dq_opmask, 2),
SIMD(OPMASK+BW/d, avx512bw_opmask, 4),
SIMD(OPMASK+BW/q, avx512bw_opmask, 8),
- SIMD(AVX512F f32 scalar, avx512f, f4),
+#define avx512f_sc_x86_32_D_f4 avx512f_x86_32_D_f4
+#define avx512f_sc_x86_64_D_f4 avx512f_x86_64_D_f4
+ SIMD(AVX512F f32 scalar, avx512f_sc, f4),
SIMD(AVX512F f32x16, avx512f, 64f4),
- SIMD(AVX512F f64 scalar, avx512f, f8),
+#define avx512f_sc_x86_32_D_f8 avx512f_x86_32_D_f8
+#define avx512f_sc_x86_64_D_f8 avx512f_x86_64_D_f8
+ SIMD(AVX512F f64 scalar, avx512f_sc, f8),
SIMD(AVX512F f64x8, avx512f, 64f8),
SIMD(AVX512F s32x16, avx512f, 64i4),
SIMD(AVX512F u32x16, avx512f, 64u4),
@@ -523,7 +542,9 @@ static const struct {
AVX512VL(_VBMI+VL u16x8, avx512vbmi, 16u2),
AVX512VL(_VBMI+VL s16x16, avx512vbmi, 32i2),
AVX512VL(_VBMI+VL u16x16, avx512vbmi, 32u2),
- SIMD(AVX512_FP16 f16 scal,avx512fp16, f2),
+#define avx512fp16_sc_x86_32_D_f2 avx512fp16_x86_32_D_f2
+#define avx512fp16_sc_x86_64_D_f2 avx512fp16_x86_64_D_f2
+ SIMD(AVX512_FP16 f16 scal,avx512fp16_sc, f2),
SIMD(AVX512_FP16 f16x32, avx512fp16, 64f2),
AVX512VL(_FP16+VL f16x8, avx512fp16, 16f2),
AVX512VL(_FP16+VL f16x16,avx512fp16, 32f2),
@@ -2810,7 +2831,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovq %xmm1,32(%edx)...");
- if ( stack_exec && simd_check_avx512f() )
+ if ( stack_exec && simd_check_avx512f_sc() )
{
decl_insn(evex_vmovq_to_mem);
@@ -2834,7 +2855,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovq 32(%edx),%xmm0...");
- if ( stack_exec && simd_check_avx512f() )
+ if ( stack_exec && simd_check_avx512f_sc() )
{
decl_insn(evex_vmovq_from_mem);
@@ -2846,11 +2867,22 @@ int main(int argc, char **argv)
rc = x86_emulate(&ctxt, &emulops);
if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovq_from_mem) )
goto fail;
- asm ( "vmovq %1, %%xmm1\n\t"
- "vpcmpeqq %%zmm0, %%zmm1, %%k0\n"
- "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) );
- if ( rc != 0xff )
- goto fail;
+ if ( simd_check_avx512f() )
+ {
+ asm ( "vmovq %1, %%xmm1\n\t"
+ "vpcmpeqq %%zmm0, %%zmm1, %%k0\n"
+ "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) );
+ if ( rc != 0x00ff )
+ goto fail;
+ }
+ else
+ {
+ asm ( "vmovq %1, %%xmm1\n\t"
+ "vpcmpeqq %%xmm0, %%xmm1, %%k0\n"
+ "kmovb %%k0, %0" : "=r" (rc) : "m" (res[8]) );
+ if ( rc != 0x03 )
+ goto fail;
+ }
printf("okay\n");
}
else
@@ -3172,7 +3204,7 @@ int main(int argc, char **argv)
printf("%-40s", "Testing vmovsd %xmm5,16(%ecx){%k3}...");
memset(res, 0x88, 128);
memset(res + 20, 0x77, 8);
- if ( stack_exec && simd_check_avx512f() )
+ if ( stack_exec && simd_check_avx512f_sc() )
{
decl_insn(vmovsd_masked_to_mem);
@@ -3390,7 +3422,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovd %xmm3,32(%ecx)...");
- if ( stack_exec && simd_check_avx512f() )
+ if ( stack_exec && simd_check_avx512f_sc() )
{
decl_insn(evex_vmovd_to_mem);
@@ -3415,7 +3447,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovd 32(%ecx),%xmm4...");
- if ( stack_exec && simd_check_avx512f() )
+ if ( stack_exec && simd_check_avx512f_sc() )
{
decl_insn(evex_vmovd_from_mem);
@@ -3428,11 +3460,22 @@ int main(int argc, char **argv)
rc = x86_emulate(&ctxt, &emulops);
if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovd_from_mem) )
goto fail;
- asm ( "vmovd %1, %%xmm0\n\t"
- "vpcmpeqd %%zmm4, %%zmm0, %%k0\n\t"
- "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) );
- if ( rc != 0xffff )
- goto fail;
+ if ( simd_check_avx512f() )
+ {
+ asm ( "vmovd %1, %%xmm0\n\t"
+ "vpcmpeqd %%zmm4, %%zmm0, %%k0\n\t"
+ "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) );
+ if ( rc != 0xffff )
+ goto fail;
+ }
+ else
+ {
+ asm ( "vmovd %1, %%xmm0\n\t"
+ "vpcmpeqd %%xmm4, %%xmm0, %%k0\n\t"
+ "kmovb %%k0, %0" : "=r" (rc) : "m" (res[8]) );
+ if ( rc != 0x0f )
+ goto fail;
+ }
printf("okay\n");
}
else
@@ -3605,7 +3648,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovd %xmm2,%ebx...");
- if ( stack_exec && simd_check_avx512f() )
+ if ( stack_exec && simd_check_avx512f_sc() )
{
decl_insn(evex_vmovd_to_reg);
@@ -3631,7 +3674,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovd %ebx,%xmm1...");
- if ( stack_exec && simd_check_avx512f() )
+ if ( stack_exec && simd_check_avx512f_sc() )
{
decl_insn(evex_vmovd_from_reg);
@@ -3645,11 +3688,22 @@ int main(int argc, char **argv)
rc = x86_emulate(&ctxt, &emulops);
if ( (rc != X86EMUL_OKAY) || !check_eip(evex_vmovd_from_reg) )
goto fail;
- asm ( "vmovd %1, %%xmm0\n\t"
- "vpcmpeqd %%zmm1, %%zmm0, %%k0\n\t"
- "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) );
- if ( rc != 0xffff )
- goto fail;
+ if ( simd_check_avx512f() )
+ {
+ asm ( "vmovd %1, %%xmm0\n\t"
+ "vpcmpeqd %%zmm1, %%zmm0, %%k0\n\t"
+ "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) );
+ if ( rc != 0xffff )
+ goto fail;
+ }
+ else
+ {
+ asm ( "vmovd %1, %%xmm0\n\t"
+ "vpcmpeqd %%xmm1, %%xmm0, %%k0\n\t"
+ "kmovb %%k0, %0" : "=r" (rc) : "m" (res[8]) );
+ if ( rc != 0x0f )
+ goto fail;
+ }
printf("okay\n");
}
else
@@ -3733,7 +3787,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovq %xmm11,32(%ecx)...");
- if ( stack_exec && simd_check_avx512f() )
+ if ( stack_exec && simd_check_avx512f_sc() )
{
decl_insn(evex_vmovq_to_mem2);
@@ -3823,7 +3877,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vmovq %xmm22,%rbx...");
- if ( stack_exec && simd_check_avx512f() )
+ if ( stack_exec && simd_check_avx512f_sc() )
{
decl_insn(evex_vmovq_to_reg);
@@ -5114,7 +5168,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vmovsh 8(%ecx),%xmm5...");
- if ( stack_exec && simd_check_avx512fp16() )
+ if ( stack_exec && simd_check_avx512fp16_sc() )
{
decl_insn(vmovsh_from_mem);
decl_insn(vmovw_to_gpr);
@@ -5132,14 +5186,28 @@ int main(int argc, char **argv)
rc = x86_emulate(&ctxt, &emulops);
if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsh_from_mem) )
goto fail;
- asm volatile ( "kmovw %2, %%k1\n\t"
- "vmovdqu16 %1, %%zmm4%{%%k1%}%{z%}\n\t"
- "vpcmpeqw %%zmm4, %%zmm5, %%k0\n\t"
- "kmovw %%k0, %0"
- : "=g" (rc)
- : "m" (res[2]), "r" (1) );
- if ( rc != 0xffff )
- goto fail;
+ if ( simd_check_avx512fp16() )
+ {
+ asm volatile ( "kmovw %2, %%k1\n\t"
+ "vmovdqu16 %1, %%zmm4%{%%k1%}%{z%}\n\t"
+ "vpcmpeqw %%zmm4, %%zmm5, %%k0\n\t"
+ "kmovw %%k0, %0"
+ : "=g" (rc)
+ : "m" (res[2]), "r" (1) );
+ if ( rc != 0xffff )
+ goto fail;
+ }
+ else
+ {
+ asm volatile ( "kmovb %2, %%k1\n\t"
+ "vmovdqu16 %1, %%xmm4%{%%k1%}%{z%}\n\t"
+ "vpcmpeqw %%xmm4, %%xmm5, %%k0\n\t"
+ "kmovb %%k0, %0"
+ : "=g" (rc)
+ : "m" (res[2]), "r" (1) );
+ if ( rc != 0xff )
+ goto fail;
+ }
printf("okay\n");
printf("%-40s", "Testing vmovsh %xmm4,2(%eax){%k3}...");
@@ -240,7 +240,7 @@ int emul_test_get_fpu(
break;
case X86EMUL_FPU_opmask:
case X86EMUL_FPU_zmm:
- if ( cpu_has_avx512f )
+ if ( cpu_has_avx512f || cpu_has_avx10_1 )
break;
default:
return X86EMUL_UNHANDLEABLE;
@@ -181,6 +181,12 @@ void wrpkru(unsigned int val);
#define cpu_has_avx_vnni_int8 (cpu_policy.feat.avx_vnni_int8 && xcr0_mask(6))
#define cpu_has_avx_ne_convert (cpu_policy.feat.avx_ne_convert && xcr0_mask(6))
#define cpu_has_avx_vnni_int16 (cpu_policy.feat.avx_vnni_int16 && xcr0_mask(6))
+ /* TBD: Is bit 6 (ZMM_Hi256) really needed here? */
+#define cpu_has_avx10_1 (cpu_policy.feat.avx10 && xcr0_mask(0xe6))
+#define cpu_has_avx10_1_256 (cpu_has_avx10_1 && \
+ (cpu_policy.avx10.vsz256 || \
+ cpu_policy.avx10.vsz512))
+#define cpu_has_avx10_1_512 (cpu_has_avx10_1 && cpu_policy.avx10.vsz512)
#define cpu_has_xgetbv1 (cpu_has_xsave && cpu_policy.xstate.xgetbv1)
Re-use respective AVX512 tests, by suitably adjusting the predicate functions. This leaves test names ("Testing ... NN-bit code sequence") somewhat misleading, but I think we can live with that. Note that the AVX512{BW,DQ} opmask tests cannot be run as-is for the AVX10/256 case, as they include 512-bit vector <-> opmask insn tests. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- SDE: -gnr / -gnr256 --- TBD: For AVX10.1/256 need to somehow guarantee that the generated blobs really don't use 512-bit insns (it's uncertain whether passing -mprefer-vector-width= is enough). Right now according to my testing on SDE this is all fine. May need to probe for support of the new -mno-evex512 compiler option. The AVX512{BW,DQ} opmask tests could of course be cloned (i.e. rebuilt another time with -mavx512vl passed) accordingly, but the coverage gain wouldbe pretty marginal. --- v2: Drop SDE 9.27.0 workaround. Re-base over dropping of Xeon Phi support.