From patchwork Fri Mar 15 10:39:27 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 10854457 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 83FAA1575 for ; Fri, 15 Mar 2019 10:41:12 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 67E0C2A934 for ; Fri, 15 Mar 2019 10:41:12 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 57E0B2A933; Fri, 15 Mar 2019 10:41:12 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id E2CCF2A933 for ; Fri, 15 Mar 2019 10:41:10 +0000 (UTC) Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1h4kFP-0003PU-CK; Fri, 15 Mar 2019 10:39:31 +0000 Received: from all-amaz-eas1.inumbo.com ([34.197.232.57] helo=us1-amaz-eas2.inumbo.com) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1h4kFN-0003PA-Sh for xen-devel@lists.xenproject.org; Fri, 15 Mar 2019 10:39:29 +0000 X-Inumbo-ID: 9ad0a48c-470e-11e9-bd06-fb9f09d34ca8 Received: from prv1-mh.provo.novell.com (unknown [137.65.248.33]) by us1-amaz-eas2.inumbo.com (Halon) with ESMTPS id 9ad0a48c-470e-11e9-bd06-fb9f09d34ca8; Fri, 15 Mar 2019 10:39:27 +0000 (UTC) Received: from INET-PRV1-MTA by prv1-mh.provo.novell.com with Novell_GroupWise; Fri, 15 Mar 2019 04:39:26 -0600 Message-Id: <5C8B80DF020000780021F125@prv1-mh.provo.novell.com> X-Mailer: Novell GroupWise Internet Agent 18.1.0 Date: Fri, 15 Mar 2019 04:39:27 -0600 From: "Jan Beulich" To: "xen-devel" References: <5B6BF83602000078001DC548@prv1-mh.provo.novell.com> <5C8B7EC0020000780021F10B@prv1-mh.provo.novell.com> In-Reply-To: <5C8B7EC0020000780021F10B@prv1-mh.provo.novell.com> Mime-Version: 1.0 Content-Disposition: inline Subject: [Xen-devel] [PATCH v8 06/50] x86emul: basic AVX512VL testing X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: George Dunlap , Andrew Cooper , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" X-Virus-Scanned: ClamAV using ClamSMTP Test the 128- and 256-bit variants of the insns which have been implemented already. Signed-off-by: Jan Beulich Acked-by: Andrew Cooper --- v6: Don't enable AVX512VL for scalar tests, nor for S/G ones with index wider than data. Re-base over changes earlier in the series. v4: Move OVR() additions into __AVX512VL__ conditional. v3: New. --- a/tools/tests/x86_emulator/Makefile +++ b/tools/tests/x86_emulator/Makefile @@ -63,7 +63,7 @@ avx2-sg-flts := 4 8 xop-vecs := $(avx-vecs) xop-ints := 1 2 4 8 xop-flts := $(avx-flts) -avx512f-vecs := 64 +avx512f-vecs := 64 16 32 avx512f-ints := 4 8 avx512f-flts := 4 8 --- a/tools/tests/x86_emulator/simd-fma.c +++ b/tools/tests/x86_emulator/simd-fma.c @@ -5,13 +5,13 @@ ENTRY(fma_test); #if VEC_SIZE < 16 && !defined(to_bool) # define to_bool(cmp) (!~(cmp)[0]) -#elif VEC_SIZE == 16 +#elif VEC_SIZE == 16 && !defined(__AVX512VL__) # if FLOAT_SIZE == 4 # define to_bool(cmp) __builtin_ia32_vtestcps(cmp, (vec_t){} == 0) # elif FLOAT_SIZE == 8 # define to_bool(cmp) __builtin_ia32_vtestcpd(cmp, (vec_t){} == 0) # endif -#elif VEC_SIZE == 32 +#elif VEC_SIZE == 32 && !defined(__AVX512VL__) # if FLOAT_SIZE == 4 # define to_bool(cmp) __builtin_ia32_vtestcps256(cmp, (vec_t){} == 0) # elif FLOAT_SIZE == 8 --- a/tools/tests/x86_emulator/simd.c +++ b/tools/tests/x86_emulator/simd.c @@ -539,7 +539,7 @@ static inline bool _to_bool(byte_vec_t b # define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 64)) # endif #endif -#if VEC_SIZE == 16 && defined(__SSE4_1__) +#if VEC_SIZE == 16 && defined(__SSE4_1__) && !defined(__AVX512VL__) # if INT_SIZE == 1 # define max(x, y) ((vec_t)__builtin_ia32_pmaxsb128((vqi_t)(x), (vqi_t)(y))) # define min(x, y) ((vec_t)__builtin_ia32_pminsb128((vqi_t)(x), (vqi_t)(y))) @@ -593,7 +593,7 @@ static inline bool _to_bool(byte_vec_t b # define mix(x, y) __builtin_ia32_blendpd(x, y, 0b10) # endif #endif -#if VEC_SIZE == 32 && defined(__AVX__) +#if VEC_SIZE == 32 && defined(__AVX__) && !defined(__AVX512VL__) # if FLOAT_SIZE == 4 # define dot_product(x, y) ({ \ vec_t t_ = __builtin_ia32_dpps256(x, y, 0b11110001); \ --- a/tools/tests/x86_emulator/simd.h +++ b/tools/tests/x86_emulator/simd.h @@ -92,6 +92,15 @@ typedef long long __attribute__((vector_ #ifdef __AVX512F__ +# if VEC_SIZE > ELEM_SIZE && (defined(VEC_MAX) ? VEC_MAX : VEC_SIZE) < 64 +# pragma GCC target ( "avx512vl" ) +# endif + +# define REN(insn, old, new) \ + asm ( ".macro v" #insn #old " o:vararg \n\t" \ + "v" #insn #new " \\o \n\t" \ + ".endm" ) + /* * The original plan was to effect use of EVEX encodings for scalar as well as * 128- and 256-bit insn variants by restricting the compiler to use (on 64-bit @@ -135,25 +144,88 @@ asm ( ".macro override insn \n\t" # define OVR_FP(n) OVR_VFP(n); OVR_SFP(n) # define OVR_INT(n) OVR_BW(n); OVR_DQ(n) +OVR_INT(broadcast); OVR_SFP(broadcast); OVR_SFP(comi); OVR_FP(add); +OVR_INT(add); OVR_FP(div); OVR(extractps); OVR_FMA(fmadd, FP); +OVR_FMA(fmaddsub, VFP); OVR_FMA(fmsub, FP); +OVR_FMA(fmsubadd, VFP); OVR_FMA(fnmadd, FP); OVR_FMA(fnmsub, FP); OVR(insertps); OVR_FP(max); +OVR_INT(maxs); +OVR_INT(maxu); OVR_FP(min); +OVR_INT(mins); +OVR_INT(minu); OVR(movd); OVR(movq); OVR_SFP(mov); +OVR_VFP(mova); +OVR_VFP(movnt); +OVR_VFP(movu); OVR_FP(mul); +OVR_VFP(shuf); +OVR_INT(sll); +OVR_DQ(sllv); OVR_FP(sqrt); +OVR_INT(sra); +OVR_DQ(srav); +OVR_INT(srl); +OVR_DQ(srlv); OVR_FP(sub); +OVR_INT(sub); OVR_SFP(ucomi); +OVR_VFP(unpckh); +OVR_VFP(unpckl); + +# ifdef __AVX512VL__ +# if ELEM_SIZE == 8 && defined(__AVX512DQ__) +REN(extract, f128, f64x2); +REN(extract, i128, i64x2); +REN(insert, f128, f64x2); +REN(insert, i128, i64x2); +# else +REN(extract, f128, f32x4); +REN(extract, i128, i32x4); +REN(insert, f128, f32x4); +REN(insert, i128, i32x4); +# endif +# if ELEM_SIZE == 8 +REN(movdqa, , 64); +REN(movdqu, , 64); +REN(pand, , q); +REN(pandn, , q); +REN(por, , q); +REN(pxor, , q); +# else +# if ELEM_SIZE == 1 && defined(__AVX512BW__) +REN(movdq, a, u8); +REN(movdqu, , 8); +# elif ELEM_SIZE == 2 && defined(__AVX512BW__) +REN(movdq, a, u16); +REN(movdqu, , 16); +# else +REN(movdqa, , 32); +REN(movdqu, , 32); +# endif +REN(pand, , d); +REN(pandn, , d); +REN(por, , d); +REN(pxor, , d); +# endif +OVR(movntdq); +OVR(movntdqa); +OVR(pmulld); +OVR(pmuldq); +OVR(pmuludq); +# endif # undef OVR_VFP # undef OVR_SFP --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -88,6 +88,11 @@ static bool simd_check_avx512f(void) } #define simd_check_avx512f_opmask simd_check_avx512f +static bool simd_check_avx512f_vl(void) +{ + return cpu_has_avx512f && cpu_has_avx512vl; +} + static bool simd_check_avx512dq(void) { return cpu_has_avx512dq; @@ -142,11 +147,21 @@ static const struct { .check_cpu = simd_check_ ## feat, \ .set_regs = simd_set_regs, \ .check_regs = simd_check_regs } +#define AVX512VL_(bits, desc, feat, form) \ + { .code = feat ## _x86_ ## bits ## _D ## _ ## form, \ + .size = sizeof(feat ## _x86_ ## bits ## _D ## _ ## form), \ + .bitness = bits, .name = "AVX512" #desc, \ + .check_cpu = simd_check_ ## feat ## _vl, \ + .set_regs = simd_set_regs, \ + .check_regs = simd_check_regs } #ifdef __x86_64__ # define SIMD(desc, feat, form) SIMD_(64, desc, feat, form), \ SIMD_(32, desc, feat, form) +# define AVX512VL(desc, feat, form) AVX512VL_(64, desc, feat, form), \ + AVX512VL_(32, desc, feat, form) #else # define SIMD(desc, feat, form) SIMD_(32, desc, feat, form) +# define AVX512VL(desc, feat, form) AVX512VL_(32, desc, feat, form) #endif SIMD(3DNow! single, _3dnow, 8f4), SIMD(SSE scalar single, sse, f4), @@ -257,6 +272,20 @@ static const struct { SIMD(AVX512F u32x16, avx512f, 64u4), SIMD(AVX512F s64x8, avx512f, 64i8), SIMD(AVX512F u64x8, avx512f, 64u8), + AVX512VL(VL f32x4, avx512f, 16f4), + AVX512VL(VL f64x2, avx512f, 16f8), + AVX512VL(VL f32x8, avx512f, 32f4), + AVX512VL(VL f64x4, avx512f, 32f8), + AVX512VL(VL s32x4, avx512f, 16i4), + AVX512VL(VL u32x4, avx512f, 16u4), + AVX512VL(VL s32x8, avx512f, 32i4), + AVX512VL(VL u32x8, avx512f, 32u4), + AVX512VL(VL s64x2, avx512f, 16i8), + AVX512VL(VL u64x2, avx512f, 16u8), + AVX512VL(VL s64x4, avx512f, 32i8), + AVX512VL(VL u64x4, avx512f, 32u8), +#undef AVX512VL_ +#undef AVX512VL #undef SIMD_ #undef SIMD };