diff mbox series

[v8,48/50] x86emul: add an AES/VAES test case to the harness

Message ID 5C8B8771020000780021F329@prv1-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show
Series x86emul: remaining AVX512 support | expand

Commit Message

Jan Beulich March 15, 2019, 11:07 a.m. UTC
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v8: New.

Comments

Andrew Cooper June 21, 2019, 1:36 p.m. UTC | #1
On 15/03/2019 11:07, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

This feels like it should be folded with patch 45 (or perhaps easier, 45
moved later and folded into this one.  The exact ordering of patches
really doesn't matter).

> @@ -91,6 +95,16 @@ static bool simd_check_xop(void)
>      return cpu_has_xop;
>  }
>  
> +static bool simd_check_ssse3_aes(void)
> +{
> +    return cpu_has_aesni && cpu_has_ssse3;
> +}
> +
> +static bool simd_check_avx_aes(void)
> +{
> +    return cpu_has_aesni && cpu_has_avx;
> +}
> +
>  static bool simd_check_avx512f(void)
>  {
>      return cpu_has_avx512f;
> @@ -141,6 +155,22 @@ static bool simd_check_avx512vbmi_vl(voi
>      return cpu_has_avx512_vbmi && cpu_has_avx512vl;
>  }
>  
> +static bool simd_check_avx2_vaes(void)
> +{
> +    return cpu_has_aesni && cpu_has_vaes && cpu_has_avx2;
> +}
> +
> +static bool simd_check_avx512bw_vaes(void)
> +{
> +    return cpu_has_aesni && cpu_has_vaes && cpu_has_avx512bw;
> +}
> +
> +static bool simd_check_avx512bw_vaes_vl(void)
> +{
> +    return cpu_has_aesni && cpu_has_vaes &&
> +           cpu_has_avx512bw && cpu_has_avx512vl;
> +}

I've got the same concerns WRT feature tests as with the previous
patch.  Everything else LGTM.

~Andrew
Jan Beulich June 21, 2019, 2:04 p.m. UTC | #2
>>> On 21.06.19 at 15:36, <andrew.cooper3@citrix.com> wrote:
> On 15/03/2019 11:07, Jan Beulich wrote:
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> This feels like it should be folded with patch 45 (or perhaps easier, 45
> moved later and folded into this one.  The exact ordering of patches
> really doesn't matter).

Not really imo - we've had AESNI support even before this series.
Test harness coverage for everything gets added here. Apart
from this the late placement in the series is attributed to its history:
It took me a while after having finished the main AVX512 work to
find time to actually come up with at least marginally sensible tests
here.

I'd prefer if things remained split the way they are.

>> @@ -91,6 +95,16 @@ static bool simd_check_xop(void)
>>      return cpu_has_xop;
>>  }
>>  
>> +static bool simd_check_ssse3_aes(void)
>> +{
>> +    return cpu_has_aesni && cpu_has_ssse3;
>> +}
>> +
>> +static bool simd_check_avx_aes(void)
>> +{
>> +    return cpu_has_aesni && cpu_has_avx;
>> +}
>> +
>>  static bool simd_check_avx512f(void)
>>  {
>>      return cpu_has_avx512f;
>> @@ -141,6 +155,22 @@ static bool simd_check_avx512vbmi_vl(voi
>>      return cpu_has_avx512_vbmi && cpu_has_avx512vl;
>>  }
>>  
>> +static bool simd_check_avx2_vaes(void)
>> +{
>> +    return cpu_has_aesni && cpu_has_vaes && cpu_has_avx2;
>> +}
>> +
>> +static bool simd_check_avx512bw_vaes(void)
>> +{
>> +    return cpu_has_aesni && cpu_has_vaes && cpu_has_avx512bw;
>> +}
>> +
>> +static bool simd_check_avx512bw_vaes_vl(void)
>> +{
>> +    return cpu_has_aesni && cpu_has_vaes &&
>> +           cpu_has_avx512bw && cpu_has_avx512vl;
>> +}
> 
> I've got the same concerns WRT feature tests as with the previous
> patch.  Everything else LGTM.

Right - let's settle on that aspect there first.

Jan
Andrew Cooper June 21, 2019, 2:20 p.m. UTC | #3
On 21/06/2019 15:04, Jan Beulich wrote:
>>>> On 21.06.19 at 15:36, <andrew.cooper3@citrix.com> wrote:
>> On 15/03/2019 11:07, Jan Beulich wrote:
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> This feels like it should be folded with patch 45 (or perhaps easier, 45
>> moved later and folded into this one.  The exact ordering of patches
>> really doesn't matter).
> Not really imo - we've had AESNI support even before this series.
> Test harness coverage for everything gets added here. Apart
> from this the late placement in the series is attributed to its history:
> It took me a while after having finished the main AVX512 work to
> find time to actually come up with at least marginally sensible tests
> here.
>
> I'd prefer if things remained split the way they are.

Fair enough.  Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff mbox series

Patch

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -19,8 +19,9 @@  CFLAGS += $(CFLAGS_xeninclude)
 SIMD := 3dnow sse sse2 sse4 avx avx2 xop avx512f avx512bw avx512dq avx512er avx512vbmi
 FMA := fma4 fma
 SG := avx2-sg avx512f-sg avx512vl-sg
+AES := ssse3-aes avx-aes avx2-vaes avx512bw-vaes
 GF := sse2-gf avx2-gf avx512bw-gf
-TESTCASES := blowfish $(SIMD) $(FMA) $(SG) $(GF)
+TESTCASES := blowfish $(SIMD) $(FMA) $(SG) $(AES) $(GF)
 
 OPMASK := avx512f avx512dq avx512bw
 
@@ -143,6 +144,10 @@  $(1)-cflags := \
 	   $(foreach flt,$($(1)-flts), \
 	     "-D_$(vec)x$(idx)f$(flt) -m$(1:-sg=) $(call non-sse,$(1)) -Os -DVEC_MAX=$(vec) -DIDX_SIZE=$(idx) -DFLOAT_SIZE=$(flt)")))
 endef
+define simd-aes-defs
+$(1)-cflags := $(foreach vec,$($(patsubst %-aes,sse,$(1))-vecs) $($(patsubst %-vaes,%,$(1))-vecs), \
+	         "-D_$(vec) -maes $(addprefix -m,$(subst -,$(space),$(1))) $(call non-sse,$(1)) -Os -DVEC_SIZE=$(vec)")
+endef
 define simd-gf-defs
 $(1)-cflags := $(foreach vec,$($(1:-gf=)-vecs), \
 	         "-D_$(vec) -mgfni -m$(1:-gf=) $(call non-sse,$(1)) -Os -DVEC_SIZE=$(vec)")
@@ -153,6 +158,7 @@  endef
 
 $(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor))))
 $(foreach flavor,$(SG),$(eval $(call simd-sg-defs,$(flavor))))
+$(foreach flavor,$(AES),$(eval $(call simd-aes-defs,$(flavor))))
 $(foreach flavor,$(GF),$(eval $(call simd-gf-defs,$(flavor))))
 $(foreach flavor,$(OPMASK),$(eval $(call opmask-defs,$(flavor))))
 
@@ -203,10 +209,13 @@  $(addsuffix .c,$(FMA)):
 $(addsuffix .c,$(SG)):
 	ln -sf simd-sg.c $@
 
+$(addsuffix .c,$(AES)):
+	ln -sf simd-aes.c $@
+
 $(addsuffix .c,$(GF)):
 	ln -sf simd-gf.c $@
 
-$(addsuffix .h,$(SIMD) $(FMA) $(SG) $(GF)): simd.h
+$(addsuffix .h,$(SIMD) $(FMA) $(SG) $(AES) $(GF)): simd.h
 
 xop.h avx512f.h: simd-fma.c
 
--- /dev/null
+++ b/tools/tests/x86_emulator/simd-aes.c
@@ -0,0 +1,102 @@ 
+#define UINT_SIZE 1
+
+#include "simd.h"
+ENTRY(aes_test);
+
+#if VEC_SIZE == 16
+# define AES(op, a...) __builtin_ia32_vaes ## op ## _v16qi(a)
+# define imc(x) ((vec_t)__builtin_ia32_aesimc128((vdi_t)(x)))
+#elif VEC_SIZE == 32
+# define AES(op, a...) __builtin_ia32_vaes ## op ## _v32qi(a)
+# define imc(x) ({ \
+    vec_t r_; \
+    unsigned char __attribute__((vector_size(16))) t_; \
+    asm ( "vaesimc (%3), %x0\n\t" \
+          "vaesimc 16(%3), %1\n\t" \
+          "vinserti128 $1, %1, %0, %0" \
+          : "=&v" (r_), "=&v" (t_) \
+          : "m" (x), "r" (&(x)) ); \
+    r_; \
+})
+#elif VEC_SIZE == 64
+# define AES(op, a...) __builtin_ia32_vaes ## op ## _v64qi(a)
+# define imc(x) ({ \
+    vec_t r_; \
+    unsigned char __attribute__((vector_size(16))) t_; \
+    asm ( "vaesimc (%3), %x0\n\t" \
+          "vaesimc 1*16(%3), %1\n\t" \
+          "vinserti32x4 $1, %1, %0, %0\n\t" \
+          "vaesimc 2*16(%3), %1\n\t" \
+          "vinserti32x4 $2, %1, %0, %0\n\t" \
+          "vaesimc 3*16(%3), %1\n\t" \
+          "vinserti32x4 $3, %1, %0, %0" \
+          : "=&v" (r_), "=&v" (t_) \
+          : "m" (x), "r" (&(x)) ); \
+    r_; \
+})
+#endif
+
+#ifdef __AVX512BW__
+# define ALL_TRUE (~0ULL >> (64 - ELEM_COUNT))
+# define eq(x, y) (B(pcmpeqb, _mask, (vqi_t)(x), (vqi_t)(y), -1) == ALL_TRUE)
+# define aes(op, x, y) ((vec_t)AES(op, (vqi_t)(x), (vqi_t)(y)))
+#else
+# if defined(__AVX2__) && VEC_SIZE == 32
+#  define to_bool(cmp) B(ptestc, , cmp, (vdi_t){} == 0)
+#  define aes(op, x, y) ((vec_t)AES(op, (vqi_t)(x), (vqi_t)(y)))
+# else
+#  define to_bool(cmp) (__builtin_ia32_pmovmskb128(cmp) == 0xffff)
+#  define aes(op, x, y) ((vec_t)__builtin_ia32_aes ## op ## 128((vdi_t)(x), (vdi_t)(y)))
+# endif
+# define eq(x, y) to_bool((x) == (y))
+#endif
+
+int aes_test(void)
+{
+    unsigned int i;
+    vec_t src, zero = {};
+
+    for ( i = 0; i < ELEM_COUNT; ++i )
+        src[i] = i;
+
+    do {
+        vec_t x, y;
+
+        touch(src);
+        x = imc(src);
+        touch(src);
+
+        touch(zero);
+        y = aes(enclast, src, zero);
+        touch(zero);
+        y = aes(dec, y, zero);
+
+        if ( !eq(x, y) ) return __LINE__;
+
+        touch(zero);
+        x = aes(declast, src, zero);
+        touch(zero);
+        y = aes(enc, x, zero);
+        touch(y);
+        x = imc(y);
+
+        if ( !eq(x, src) ) return __LINE__;
+
+#if VEC_SIZE == 16
+        touch(src);
+        x = (vec_t)__builtin_ia32_aeskeygenassist128((vdi_t)src, 0);
+        touch(src);
+        y = (vec_t)__builtin_ia32_pshufb128((vqi_t)x,
+                                            (vqi_t){  7,  4,  5,  6,
+                                                      1,  2,  3,  0,
+                                                     15, 12, 13, 14,
+                                                      9, 10, 11,  8 });
+        if ( !eq(x, y) ) return __LINE__;
+#endif
+
+        src += ELEM_COUNT;
+        i += ELEM_COUNT;
+    } while ( i <= 256 );
+
+    return 0;
+}
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -340,6 +340,10 @@  REN(pandn, , d);
 REN(por, , d);
 REN(pxor, , d);
 #  endif
+OVR(aesdec);
+OVR(aesdeclast);
+OVR(aesenc);
+OVR(aesenclast);
 OVR(cvtpd2dqx);
 OVR(cvtpd2dqy);
 OVR(cvtpd2psx);
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -12,12 +12,15 @@  asm ( ".pushsection .test, \"ax\", @prog
 #include "sse.h"
 #include "sse2.h"
 #include "sse2-gf.h"
+#include "ssse3-aes.h"
 #include "sse4.h"
 #include "avx.h"
+#include "avx-aes.h"
 #include "fma4.h"
 #include "fma.h"
 #include "avx2.h"
 #include "avx2-sg.h"
+#include "avx2-vaes.h"
 #include "avx2-gf.h"
 #include "xop.h"
 #include "avx512f-opmask.h"
@@ -27,6 +30,7 @@  asm ( ".pushsection .test, \"ax\", @prog
 #include "avx512f-sg.h"
 #include "avx512vl-sg.h"
 #include "avx512bw.h"
+#include "avx512bw-vaes.h"
 #include "avx512bw-gf.h"
 #include "avx512dq.h"
 #include "avx512er.h"
@@ -91,6 +95,16 @@  static bool simd_check_xop(void)
     return cpu_has_xop;
 }
 
+static bool simd_check_ssse3_aes(void)
+{
+    return cpu_has_aesni && cpu_has_ssse3;
+}
+
+static bool simd_check_avx_aes(void)
+{
+    return cpu_has_aesni && cpu_has_avx;
+}
+
 static bool simd_check_avx512f(void)
 {
     return cpu_has_avx512f;
@@ -141,6 +155,22 @@  static bool simd_check_avx512vbmi_vl(voi
     return cpu_has_avx512_vbmi && cpu_has_avx512vl;
 }
 
+static bool simd_check_avx2_vaes(void)
+{
+    return cpu_has_aesni && cpu_has_vaes && cpu_has_avx2;
+}
+
+static bool simd_check_avx512bw_vaes(void)
+{
+    return cpu_has_aesni && cpu_has_vaes && cpu_has_avx512bw;
+}
+
+static bool simd_check_avx512bw_vaes_vl(void)
+{
+    return cpu_has_aesni && cpu_has_vaes &&
+           cpu_has_avx512bw && cpu_has_avx512vl;
+}
+
 static bool simd_check_sse2_gf(void)
 {
     return cpu_has_gfni && cpu_has_sse2;
@@ -319,6 +349,8 @@  static const struct {
     SIMD(XOP i16x16,              xop,      32i2),
     SIMD(XOP i32x8,               xop,      32i4),
     SIMD(XOP i64x4,               xop,      32i8),
+    SIMD(AES (legacy),      ssse3_aes,        16),
+    SIMD(AES (VEX/x16),       avx_aes,        16),
     SIMD(OPMASK/w,     avx512f_opmask,         2),
     SIMD(OPMASK+DQ/b, avx512dq_opmask,         1),
     SIMD(OPMASK+DQ/w, avx512dq_opmask,         2),
@@ -418,6 +450,10 @@  static const struct {
     AVX512VL(_VBMI+VL u16x8, avx512vbmi,    16u2),
     AVX512VL(_VBMI+VL s16x16, avx512vbmi,   32i2),
     AVX512VL(_VBMI+VL u16x16, avx512vbmi,   32u2),
+    SIMD(VAES (VEX/x32),    avx2_vaes,        32),
+    SIMD(VAES (EVEX/x64), avx512bw_vaes,      64),
+    AVX512VL(VL+VAES (x16), avx512bw_vaes,    16),
+    AVX512VL(VL+VAES (x32), avx512bw_vaes,    32),
     SIMD(GFNI (legacy),       sse2_gf,        16),
     SIMD(GFNI (VEX/x16),      avx2_gf,        16),
     SIMD(GFNI (VEX/x32),      avx2_gf,        32),
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -125,10 +125,12 @@  static inline bool xcr0_mask(uint64_t ma
 #define cpu_has_sse        cp.basic.sse
 #define cpu_has_sse2       cp.basic.sse2
 #define cpu_has_sse3       cp.basic.sse3
+#define cpu_has_ssse3      cp.basic.ssse3
 #define cpu_has_fma       (cp.basic.fma && xcr0_mask(6))
 #define cpu_has_sse4_1     cp.basic.sse4_1
 #define cpu_has_sse4_2     cp.basic.sse4_2
 #define cpu_has_popcnt     cp.basic.popcnt
+#define cpu_has_aesni      cp.basic.aesni
 #define cpu_has_avx       (cp.basic.avx  && xcr0_mask(6))
 #define cpu_has_f16c      (cp.basic.f16c && xcr0_mask(6))