From patchwork Fri Mar 15 10:41:03 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 10854463 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id EB41E1575 for ; Fri, 15 Mar 2019 10:42:41 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D2F092A933 for ; Fri, 15 Mar 2019 10:42:41 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id C711D2A936; Fri, 15 Mar 2019 10:42:41 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 470F52A933 for ; Fri, 15 Mar 2019 10:42:41 +0000 (UTC) Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1h4kGw-0004HN-KW; Fri, 15 Mar 2019 10:41:06 +0000 Received: from us1-rack-dfw2.inumbo.com ([104.130.134.6]) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1h4kGv-0004HD-RX for xen-devel@lists.xenproject.org; Fri, 15 Mar 2019 10:41:05 +0000 X-Inumbo-ID: d52b4836-470e-11e9-bc90-bc764e045a96 Received: from prv1-mh.provo.novell.com (unknown [137.65.248.33]) by us1-rack-dfw2.inumbo.com (Halon) with ESMTPS id d52b4836-470e-11e9-bc90-bc764e045a96; Fri, 15 Mar 2019 10:41:04 +0000 (UTC) Received: from INET-PRV1-MTA by prv1-mh.provo.novell.com with Novell_GroupWise; Fri, 15 Mar 2019 04:41:04 -0600 Message-Id: <5C8B813F020000780021F164@prv1-mh.provo.novell.com> X-Mailer: Novell GroupWise Internet Agent 18.1.0 Date: Fri, 15 Mar 2019 04:41:03 -0600 From: "Jan Beulich" To: "xen-devel" References: <5B6BF83602000078001DC548@prv1-mh.provo.novell.com> <5C8B7EC0020000780021F10B@prv1-mh.provo.novell.com> In-Reply-To: <5C8B7EC0020000780021F10B@prv1-mh.provo.novell.com> Mime-Version: 1.0 Content-Disposition: inline Subject: [Xen-devel] [PATCH v8 09/50] x86emul: support AVX512{F, BW} integer unpack insns X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: George Dunlap , Andrew Cooper , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" X-Virus-Scanned: ClamAV using ClamSMTP There's once again one extra twobyte_table[] entry which gets its Disp8 shift value set right away without getting support implemented just yet, again to avoid needlessly splitting groups of entries. Signed-off-by: Jan Beulich Acked-by: Andrew Cooper --- v8: Re-base. v6: Re-base over changes earlier in the series. v4: Move OVR() additions into __AVX512VL__ conditional. v3: New. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -229,6 +229,10 @@ static const struct test avx512f_all[] = INSN(pternlog, 66, 0f3a, 25, vl, dq, vl), INSN(ptestm, 66, 0f38, 27, vl, dq, vl), INSN(ptestnm, f3, 0f38, 27, vl, dq, vl), + INSN(punpckhdq, 66, 0f, 6a, vl, d, vl), + INSN(punpckhqdq, 66, 0f, 6d, vl, q, vl), + INSN(punpckldq, 66, 0f, 62, vl, d, vl), + INSN(punpcklqdq, 66, 0f, 6c, vl, q, vl), INSN(pxor, 66, 0f, ef, vl, dq, vl), INSN_PFP(shuf, 0f, c6), INSN_FP(sqrt, 0f, 51), @@ -327,6 +331,10 @@ static const struct test avx512bw_all[] INSN(psubw, 66, 0f, f9, vl, w, vl), INSN(ptestm, 66, 0f38, 26, vl, bw, vl), INSN(ptestnm, f3, 0f38, 26, vl, bw, vl), + INSN(punpckhbw, 66, 0f, 68, vl, b, vl), + INSN(punpckhwd, 66, 0f, 69, vl, w, vl), + INSN(punpcklbw, 66, 0f, 60, vl, b, vl), + INSN(punpcklwd, 66, 0f, 61, vl, w, vl), }; static const struct test avx512bw_128[] = { --- a/tools/tests/x86_emulator/simd.c +++ b/tools/tests/x86_emulator/simd.c @@ -300,6 +300,10 @@ static inline bool _to_bool(byte_vec_t b asm ( "vpbroadcastd %k1, %0" : "=v" (t_) : "r" (x) ); \ t_; \ }) +# if VEC_SIZE == 16 +# define interleave_hi(x, y) ((vec_t)B(punpckhdq, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0)) +# define interleave_lo(x, y) ((vec_t)B(punpckldq, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0)) +# endif # define mix(x, y) ((vec_t)B(movdqa32_, _mask, (vsi_t)(x), (vsi_t)(y), \ (0b0101010101010101 & ((1 << ELEM_COUNT) - 1)))) # define shrink1(x) ((half_t)B(pmovqd, _mask, (vdi_t)(x), (vsi_half_t){}, ~0)) @@ -317,6 +321,10 @@ static inline bool _to_bool(byte_vec_t b t_; \ }) # endif +# if VEC_SIZE == 16 +# define interleave_hi(x, y) ((vec_t)B(punpckhqdq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0)) +# define interleave_lo(x, y) ((vec_t)B(punpcklqdq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0)) +# endif # define mix(x, y) ((vec_t)B(movdqa64_, _mask, (vdi_t)(x), (vdi_t)(y), 0b01010101)) # endif # if INT_SIZE == 4 --- a/tools/tests/x86_emulator/simd.h +++ b/tools/tests/x86_emulator/simd.h @@ -252,6 +252,10 @@ OVR(pmovzxwq); OVR(pmulld); OVR(pmuldq); OVR(pmuludq); +OVR(punpckhdq); +OVR(punpckhqdq); +OVR(punpckldq); +OVR(punpcklqdq); # endif # undef OVR_VFP --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -312,10 +312,10 @@ static const struct twobyte_table { [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl }, [0x5a ... 0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl }, - [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other }, + [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl }, [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, - [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other }, - [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int }, + [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl }, + [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0x6e] = { DstImplicit|SrcMem|ModRM|Mov, simd_none, d8s_dq64 }, [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int, d8s_vl }, [0x70] = { SrcImmByte|ModRM|TwoOp, simd_other }, @@ -6681,6 +6681,12 @@ x86_emulate( case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw [xyz]mm/mem,[xyz]mm,[xyz]mm */ generate_exception_if(evex.opmsk, EXC_UD); /* fall through */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x60): /* vpunpcklbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x61): /* vpunpcklwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x68): /* vpunpckhbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x69): /* vpunpckhwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + op_bytes = 16 << evex.lr; + /* fall through */ case X86EMUL_OPC_EVEX_66(0x0f, 0xd1): /* vpsrlw xmm/m128,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f, 0xe1): /* vpsraw xmm/m128,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f, 0xf1): /* vpsllw xmm/m128,[xyz]mm,[xyz]mm{k} */ @@ -6708,6 +6714,13 @@ x86_emulate( elem_bytes = 1 << (b & 1); goto avx512f_no_sae; + case X86EMUL_OPC_EVEX_66(0x0f, 0x62): /* vpunpckldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x6a): /* vpunpckhdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + generate_exception_if(evex.w, EXC_UD); + fault_suppression = false; + op_bytes = 16 << evex.lr; + goto avx512f_no_sae; + case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} [xyz]mm/mem,[xyz]mm,k{k} */ case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} [xyz]mm/mem,[xyz]mm,k{k} */ op_bytes = 16 << evex.lr; @@ -6734,6 +6747,10 @@ x86_emulate( avx512_vlen_check(false); goto simd_zmm; + case X86EMUL_OPC_EVEX_66(0x0f, 0x6c): /* vpunpcklqdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(0x0f, 0x6d): /* vpunpckhqdq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + fault_suppression = false; + /* fall through */ case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */