@@ -167,6 +167,11 @@ static bool simd_check_avx512vbmi_vl(voi
return cpu_has_avx512_vbmi && cpu_has_avx512vl;
}
+static bool simd_check_avx512vbmi2(void)
+{
+ return cpu_has_avx512_vbmi2;
+}
+
static bool simd_check_sse4_sha(void)
{
return cpu_has_sha && cpu_has_sse4_2;
@@ -179,7 +184,7 @@ static bool simd_check_avx_sha(void)
static bool simd_check_avx512f_sha_vl(void)
{
- return cpu_has_sha && cpu_has_avx512vl;
+ return cpu_has_sha && simd_check_avx512f_vl();
}
static bool simd_check_avx2_vaes(void)
@@ -189,13 +194,13 @@ static bool simd_check_avx2_vaes(void)
static bool simd_check_avx512bw_vaes(void)
{
- return cpu_has_aesni && cpu_has_vaes && cpu_has_avx512bw;
+ return cpu_has_aesni && cpu_has_vaes && simd_check_avx512bw();
}
static bool simd_check_avx512bw_vaes_vl(void)
{
return cpu_has_aesni && cpu_has_vaes &&
- cpu_has_avx512bw && cpu_has_avx512vl;
+ simd_check_avx512bw_vl();
}
static bool simd_check_avx2_vpclmulqdq(void)
@@ -205,22 +210,22 @@ static bool simd_check_avx2_vpclmulqdq(v
static bool simd_check_avx512bw_vpclmulqdq(void)
{
- return cpu_has_vpclmulqdq && cpu_has_avx512bw;
+ return cpu_has_vpclmulqdq && simd_check_avx512bw();
}
static bool simd_check_avx512bw_vpclmulqdq_vl(void)
{
- return cpu_has_vpclmulqdq && cpu_has_avx512bw && cpu_has_avx512vl;
+ return cpu_has_vpclmulqdq && simd_check_avx512bw_vl();
}
static bool simd_check_avx512vbmi2_vpclmulqdq(void)
{
- return cpu_has_avx512_vbmi2 && simd_check_avx512bw_vpclmulqdq();
+ return simd_check_avx512vbmi2() && simd_check_avx512bw_vpclmulqdq();
}
static bool simd_check_avx512vbmi2_vpclmulqdq_vl(void)
{
- return cpu_has_avx512_vbmi2 && simd_check_avx512bw_vpclmulqdq_vl();
+ return simd_check_avx512vbmi2() && simd_check_avx512bw_vpclmulqdq_vl();
}
static bool simd_check_sse2_gf(void)
@@ -235,12 +240,17 @@ static bool simd_check_avx2_gf(void)
static bool simd_check_avx512bw_gf(void)
{
- return cpu_has_gfni && cpu_has_avx512bw;
+ return cpu_has_gfni && simd_check_avx512bw();
}
static bool simd_check_avx512bw_gf_vl(void)
{
- return cpu_has_gfni && cpu_has_avx512vl;
+ return cpu_has_gfni && simd_check_avx512bw_vl();
+}
+
+static bool simd_check_avx512vnni(void)
+{
+ return cpu_has_avx512_vnni;
}
static bool simd_check_avx512fp16(void)
@@ -2800,7 +2810,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovq %xmm1,32(%edx)...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vmovq_to_mem);
@@ -2824,7 +2834,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovq 32(%edx),%xmm0...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vmovq_from_mem);
@@ -2947,7 +2957,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vmovdqu32 %zmm2,(%ecx){%k1}...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(vmovdqu32_to_mem);
@@ -2977,7 +2987,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vmovdqu32 64(%edx),%zmm2{%k2}...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(vmovdqu32_from_mem);
@@ -3002,7 +3012,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vmovdqu16 %zmm3,(%ecx){%k1}...");
- if ( stack_exec && cpu_has_avx512bw )
+ if ( stack_exec && simd_check_avx512bw() )
{
decl_insn(vmovdqu16_to_mem);
@@ -3034,7 +3044,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vmovdqu16 64(%edx),%zmm3{%k2}...");
- if ( stack_exec && cpu_has_avx512bw )
+ if ( stack_exec && simd_check_avx512bw() )
{
decl_insn(vmovdqu16_from_mem);
@@ -3162,7 +3172,7 @@ int main(int argc, char **argv)
printf("%-40s", "Testing vmovsd %xmm5,16(%ecx){%k3}...");
memset(res, 0x88, 128);
memset(res + 20, 0x77, 8);
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(vmovsd_masked_to_mem);
@@ -3197,7 +3207,7 @@ int main(int argc, char **argv)
}
printf("%-40s", "Testing vmovaps (%edx),%zmm7{%k3}{z}...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(vmovaps_masked_from_mem);
@@ -3380,7 +3390,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovd %xmm3,32(%ecx)...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vmovd_to_mem);
@@ -3405,7 +3415,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovd 32(%ecx),%xmm4...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vmovd_from_mem);
@@ -3595,7 +3605,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovd %xmm2,%ebx...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vmovd_to_reg);
@@ -3621,7 +3631,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovd %ebx,%xmm1...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vmovd_from_reg);
@@ -3723,7 +3733,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing {evex} vmovq %xmm11,32(%ecx)...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vmovq_to_mem2);
@@ -3813,7 +3823,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vmovq %xmm22,%rbx...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vmovq_to_reg);
@@ -4006,7 +4016,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vmovntdqa 64(%ecx),%zmm4...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vmovntdqa);
@@ -4602,7 +4612,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vcvtph2ps 32(%ecx),%zmm7{%k4}...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(evex_vcvtph2ps);
decl_insn(evex_vcvtps2ph);
@@ -4645,7 +4655,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vfixupimmpd $0,8(%edx){1to8},%zmm3,%zmm4...");
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(vfixupimmpd);
static const struct {
@@ -4684,7 +4694,7 @@ int main(int argc, char **argv)
printf("%-40s", "Testing vfpclasspsz $0x46,64(%edx),%k2...");
- if ( stack_exec && cpu_has_avx512dq )
+ if ( stack_exec && simd_check_avx512dq() )
{
decl_insn(vfpclassps);
@@ -4716,7 +4726,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vfpclassphz $0x46,128(%ecx),%k3...");
- if ( stack_exec && cpu_has_avx512_fp16 )
+ if ( stack_exec && simd_check_avx512fp16() )
{
decl_insn(vfpclassph);
@@ -4759,7 +4769,7 @@ int main(int argc, char **argv)
* on the mapping boundaries) that elements controlled by clear mask
* bits don't get accessed.
*/
- if ( stack_exec && cpu_has_avx512f )
+ if ( stack_exec && simd_check_avx512f() )
{
decl_insn(vpcompressd);
decl_insn(vpcompressq);
@@ -4861,7 +4871,7 @@ int main(int argc, char **argv)
}
#if __GNUC__ > 7 /* can't check for __AVX512VBMI2__ here */
- if ( stack_exec && cpu_has_avx512_vbmi2 )
+ if ( stack_exec && simd_check_avx512vbmi2() )
{
decl_insn(vpcompressb);
decl_insn(vpcompressw);
@@ -5049,7 +5059,7 @@ int main(int argc, char **argv)
}
printf("%-40s", "Testing vpdpwssd (%ecx),%{y,z}mmA,%{y,z}mmB...");
- if ( stack_exec && cpu_has_avx512_vnni && cpu_has_avx_vnni )
+ if ( stack_exec && simd_check_avx512vnni() && cpu_has_avx_vnni )
{
/* Do the same operation two ways and compare the results. */
decl_insn(vpdpwssd_vex1);
@@ -5104,7 +5114,7 @@ int main(int argc, char **argv)
printf("skipped\n");
printf("%-40s", "Testing vmovsh 8(%ecx),%xmm5...");
- if ( stack_exec && cpu_has_avx512_fp16 )
+ if ( stack_exec && simd_check_avx512fp16() )
{
decl_insn(vmovsh_from_mem);
decl_insn(vmovw_to_gpr);
In preparation for having these also cover AVX10, use the helper functions in preference of open-coded cpu_has_avx512* for those features that AVX10 includes. Introduce a couple further helper functions where they weren't previously needed. Note that this way simd_check_avx512f_sha_vl() gains an AVX512F check (which is likely benign) and simd_check_avx512bw_gf_vl() gains an AVX512BW check (which was clearly missing). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v2: Re-base over dropping of Xeon Phi support.