From patchwork Fri Mar 15 10:37:20 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 10854451 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 4B5A71575 for ; Fri, 15 Mar 2019 10:39:09 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 2E6A82A16A for ; Fri, 15 Mar 2019 10:39:09 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 1FACC2A191; Fri, 15 Mar 2019 10:39:09 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 661612A16A for ; Fri, 15 Mar 2019 10:39:08 +0000 (UTC) Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1h4kDM-00032j-5g; Fri, 15 Mar 2019 10:37:24 +0000 Received: from us1-rack-dfw2.inumbo.com ([104.130.134.6]) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1h4kDL-00032X-1l for xen-devel@lists.xenproject.org; Fri, 15 Mar 2019 10:37:23 +0000 X-Inumbo-ID: 505686fe-470e-11e9-bc90-bc764e045a96 Received: from prv1-mh.provo.novell.com (unknown [137.65.248.33]) by us1-rack-dfw2.inumbo.com (Halon) with ESMTPS id 505686fe-470e-11e9-bc90-bc764e045a96; Fri, 15 Mar 2019 10:37:21 +0000 (UTC) Received: from INET-PRV1-MTA by prv1-mh.provo.novell.com with Novell_GroupWise; Fri, 15 Mar 2019 04:37:21 -0600 Message-Id: <5C8B8060020000780021F11C@prv1-mh.provo.novell.com> X-Mailer: Novell GroupWise Internet Agent 18.1.0 Date: Fri, 15 Mar 2019 04:37:20 -0600 From: "Jan Beulich" To: "xen-devel" References: <5B6BF83602000078001DC548@prv1-mh.provo.novell.com> <5C8B7EC0020000780021F10B@prv1-mh.provo.novell.com> In-Reply-To: <5C8B7EC0020000780021F10B@prv1-mh.provo.novell.com> Mime-Version: 1.0 Content-Disposition: inline Subject: [Xen-devel] [PATCH v8 03/50] x86emul: support AVX512{F, BW, DQ} insert insns X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: George Dunlap , Andrew Cooper , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" X-Virus-Scanned: ClamAV using ClamSMTP Also correct the comment of the AVX form of VINSERTPS. Signed-off-by: Jan Beulich Acked-by: Andrew Cooper --- v7: Re-base. v6: Don't refuse to emulate VINSERTPS without AVX512VL. v4: Make use of d8s_dq64. v3: New. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -213,6 +213,7 @@ static const struct test avx512f_all[] = static const struct test avx512f_128[] = { INSN(extractps, 66, 0f3a, 17, el, d, el), + INSN(insertps, 66, 0f3a, 21, el, d, el), INSN(mov, 66, 0f, 6e, el, dq64, el), INSN(mov, 66, 0f, 7e, el, dq64, el), INSN(movq, f3, 0f, 7e, el, q, el), @@ -224,12 +225,16 @@ static const struct test avx512f_no128[] INSN(broadcastsd, 66, 0f38, 19, el, q, el), INSN(extractf32x4, 66, 0f3a, 19, el_4, d, vl), INSN(extracti32x4, 66, 0f3a, 39, el_4, d, vl), + INSN(insertf32x4, 66, 0f3a, 18, el_4, d, vl), + INSN(inserti32x4, 66, 0f3a, 38, el_4, d, vl), }; static const struct test avx512f_512[] = { INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl), INSN(extractf64x4, 66, 0f3a, 1b, el_4, q, vl), INSN(extracti64x4, 66, 0f3a, 3b, el_4, q, vl), + INSN(insertf64x4, 66, 0f3a, 1a, el_4, q, vl), + INSN(inserti64x4, 66, 0f3a, 3a, el_4, q, vl), }; static const struct test avx512bw_all[] = { @@ -289,6 +294,8 @@ static const struct test avx512bw_128[] INSN(pextrb, 66, 0f3a, 14, el, b, el), // pextrw, 66, 0f, c5, w INSN(pextrw, 66, 0f3a, 15, el, w, el), + INSN(pinsrb, 66, 0f3a, 20, el, b, el), + INSN(pinsrw, 66, 0f, c4, el, w, el), }; static const struct test avx512dq_all[] = { @@ -301,6 +308,7 @@ static const struct test avx512dq_all[] static const struct test avx512dq_128[] = { INSN(pextr, 66, 0f3a, 16, el, dq64, el), + INSN(pinsr, 66, 0f3a, 22, el, dq64, el), }; static const struct test avx512dq_no128[] = { @@ -308,12 +316,16 @@ static const struct test avx512dq_no128[ INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl), INSN(extractf64x2, 66, 0f3a, 19, el_2, q, vl), INSN(extracti64x2, 66, 0f3a, 39, el_2, q, vl), + INSN(insertf64x2, 66, 0f3a, 18, el_2, q, vl), + INSN(inserti64x2, 66, 0f3a, 38, el_2, q, vl), }; static const struct test avx512dq_512[] = { INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl), INSN(extractf32x8, 66, 0f3a, 1b, el_8, d, vl), INSN(extracti32x8, 66, 0f3a, 3b, el_8, d, vl), + INSN(insertf32x8, 66, 0f3a, 1a, el_8, d, vl), + INSN(inserti32x8, 66, 0f3a, 3a, el_8, d, vl), }; static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 }; --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -360,7 +360,7 @@ static const struct twobyte_table { [0xc1] = { DstMem|SrcReg|ModRM }, [0xc2] = { DstImplicit|SrcImmByte|ModRM, simd_any_fp, d8s_vl }, [0xc3] = { DstMem|SrcReg|ModRM|Mov }, - [0xc4] = { DstReg|SrcImmByte|ModRM, simd_packed_int }, + [0xc4] = { DstReg|SrcImmByte|ModRM, simd_packed_int, 1 }, [0xc5] = { DstReg|SrcImmByte|ModRM|Mov }, [0xc6] = { DstImplicit|SrcImmByte|ModRM, simd_packed_fp, d8s_vl }, [0xc7] = { ImplicitOps|ModRM }, @@ -516,17 +516,19 @@ static const struct ext0f3a_table { [0x15] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 1 }, [0x16] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = d8s_dq64 }, [0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1, .d8s = 2 }, - [0x18] = { .simd_size = simd_128 }, + [0x18] = { .simd_size = simd_128, .d8s = 4 }, [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 }, + [0x1a] = { .simd_size = simd_256, .d8s = d8s_vl_by_2 }, [0x1b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 }, [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 }, [0x1e ... 0x1f] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, - [0x20] = { .simd_size = simd_none }, - [0x21] = { .simd_size = simd_other }, - [0x22] = { .simd_size = simd_none }, + [0x20] = { .simd_size = simd_none, .d8s = 0 }, + [0x21] = { .simd_size = simd_other, .d8s = 2 }, + [0x22] = { .simd_size = simd_none, .d8s = d8s_dq64 }, [0x25] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0x30 ... 0x33] = { .simd_size = simd_other, .two_op = 1 }, - [0x38] = { .simd_size = simd_128 }, + [0x38] = { .simd_size = simd_128, .d8s = 4 }, + [0x3a] = { .simd_size = simd_256, .d8s = d8s_vl_by_2 }, [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1, .d8s = 4 }, [0x3b] = { .simd_size = simd_256, .to_mem = 1, .two_op = 1, .d8s = d8s_vl_by_2 }, [0x3e ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, @@ -2586,6 +2588,7 @@ x86_decode_twobyte( ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK); /* fall through */ case X86EMUL_OPC_VEX_66(0, 0xc4): /* vpinsrw */ + case X86EMUL_OPC_EVEX_66(0, 0xc4): /* vpinsrw */ state->desc = DstReg | SrcMem16; break; @@ -2688,6 +2691,7 @@ x86_decode_0f3a( case X86EMUL_OPC_66(0, 0x20): /* pinsrb */ case X86EMUL_OPC_VEX_66(0, 0x20): /* vpinsrb */ + case X86EMUL_OPC_EVEX_66(0, 0x20): /* vpinsrb */ state->desc = DstImplicit | SrcMem; if ( modrm_mod != 3 ) state->desc |= ByteOp; @@ -2695,6 +2699,7 @@ x86_decode_0f3a( case X86EMUL_OPC_66(0, 0x22): /* pinsr{d,q} */ case X86EMUL_OPC_VEX_66(0, 0x22): /* vpinsr{d,q} */ + case X86EMUL_OPC_EVEX_66(0, 0x22): /* vpinsr{d,q} */ state->desc = DstImplicit | SrcMem; break; @@ -7735,6 +7740,23 @@ x86_emulate( ea.type = OP_MEM; goto simd_0f_int_imm8; + case X86EMUL_OPC_EVEX_66(0x0f, 0xc4): /* vpinsrw $imm8,r32/m16,xmm,xmm */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */ + generate_exception_if(evex.lr || evex.opmsk || evex.brs, EXC_UD); + if ( b & 2 ) + host_and_vcpu_must_have(avx512dq); + else + host_and_vcpu_must_have(avx512bw); + if ( !mode_64bit() ) + evex.w = 0; + memcpy(mmvalp, &src.val, op_bytes); + ea.type = OP_MEM; + op_bytes = src.bytes; + d = SrcMem16; /* Fake for the common SIMD code below. */ + state->simd_size = simd_other; + goto avx512f_imm8_no_sae; + CASE_SIMD_PACKED_INT(0x0f, 0xc5): /* pextrw $imm8,{,x}mm,reg */ case X86EMUL_OPC_VEX_66(0x0f, 0xc5): /* vpextrw $imm8,xmm,reg */ generate_exception_if(vex.l, EXC_UD); @@ -8951,8 +8973,12 @@ x86_emulate( opc = init_evex(stub); goto pextr; + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x18): /* vinsertf32x4 $imm8,xmm/m128,{y,z}mm{k} */ + /* vinsertf64x2 $imm8,xmm/m128,{y,z}mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x19): /* vextractf32x4 $imm8,{y,z}mm,xmm/m128{k} */ /* vextractf64x2 $imm8,{y,z}mm,xmm/m128{k} */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x38): /* vinserti32x4 $imm8,xmm/m128,{y,z}mm{k} */ + /* vinserti64x2 $imm8,xmm/m128,{y,z}mm{k} */ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x39): /* vextracti32x4 $imm8,{y,z}mm,xmm/m128{k} */ /* vextracti64x2 $imm8,{y,z}mm,xmm/m128{k} */ if ( evex.w ) @@ -8961,8 +8987,12 @@ x86_emulate( fault_suppression = false; goto avx512f_imm8_no_sae; + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1a): /* vinsertf32x4 $imm8,ymm/m256,zmm{k} */ + /* vinsertf64x2 $imm8,ymm/m256,zmm{k} */ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1b): /* vextractf32x8 $imm8,zmm,ymm/m256{k} */ /* vextractf64x4 $imm8,zmm,ymm/m256{k} */ + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3a): /* vinserti32x4 $imm8,ymm/m256,zmm{k} */ + /* vinserti64x2 $imm8,ymm/m256,zmm{k} */ case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3b): /* vextracti32x8 $imm8,zmm,ymm/m256{k} */ /* vextracti64x4 $imm8,zmm,ymm/m256{k} */ if ( !evex.w ) @@ -9055,13 +9085,20 @@ x86_emulate( op_bytes = 4; goto simd_0f3a_common; - case X86EMUL_OPC_VEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m128,xmm,xmm */ + case X86EMUL_OPC_VEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m32,xmm,xmm */ op_bytes = 4; /* fall through */ case X86EMUL_OPC_VEX_66(0x0f3a, 0x41): /* vdppd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ generate_exception_if(vex.l, EXC_UD); goto simd_0f_imm8_avx; + case X86EMUL_OPC_EVEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m32,xmm,xmm */ + host_and_vcpu_must_have(avx512f); + generate_exception_if(evex.lr || evex.w || evex.opmsk || evex.brs, + EXC_UD); + op_bytes = 4; + goto simd_imm8_zmm; + case X86EMUL_OPC_VEX_66(0x0f3a, 0x30): /* kshiftr{b,w} $imm8,k,k */ case X86EMUL_OPC_VEX_66(0x0f3a, 0x32): /* kshiftl{b,w} $imm8,k,k */ if ( !vex.w )