From patchwork Fri Oct 19 16:57:26 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Maydell X-Patchwork-Id: 10649855 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 183F8109C for ; Fri, 19 Oct 2018 17:11:17 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 01F362847D for ; Fri, 19 Oct 2018 17:11:17 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id EA81228495; Fri, 19 Oct 2018 17:11:16 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 411402847D for ; Fri, 19 Oct 2018 17:11:16 +0000 (UTC) Received: from localhost ([::1]:51761 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gDYIt-0006oU-Cq for patchwork-qemu-devel@patchwork.kernel.org; Fri, 19 Oct 2018 13:11:15 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47691) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gDY6S-0001wg-Cg for qemu-devel@nongnu.org; Fri, 19 Oct 2018 12:58:25 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1gDY6Q-0002eV-TG for qemu-devel@nongnu.org; Fri, 19 Oct 2018 12:58:24 -0400 Received: from orth.archaic.org.uk ([2001:8b0:1d0::2]:51980) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1gDY6J-00020Y-Bh for qemu-devel@nongnu.org; Fri, 19 Oct 2018 12:58:17 -0400 Received: from pm215 by orth.archaic.org.uk with local (Exim 4.89) (envelope-from ) id 1gDY6G-0006nl-F4 for qemu-devel@nongnu.org; Fri, 19 Oct 2018 17:58:12 +0100 From: Peter Maydell To: qemu-devel@nongnu.org Date: Fri, 19 Oct 2018 17:57:26 +0100 Message-Id: <20181019165735.22511-37-peter.maydell@linaro.org> X-Mailer: git-send-email 2.19.1 In-Reply-To: <20181019165735.22511-1-peter.maydell@linaro.org> References: <20181019165735.22511-1-peter.maydell@linaro.org> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 2001:8b0:1d0::2 Subject: [Qemu-devel] [PULL 36/45] target/arm: Use gvec for NEON_3R_VML X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP From: Richard Henderson Move mla_op and mls_op expanders from translate-a64.c. Signed-off-by: Richard Henderson Message-id: 20181011205206.3552-16-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/translate.h | 2 + target/arm/translate-a64.c | 106 ----------------------------- target/arm/translate.c | 134 ++++++++++++++++++++++++++++++++----- 3 files changed, 120 insertions(+), 122 deletions(-) diff --git a/target/arm/translate.h b/target/arm/translate.h index 7eb759d0414..1b4703dc6e7 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -196,6 +196,8 @@ static inline TCGv_i32 get_ahp_flag(void) extern const GVecGen3 bsl_op; extern const GVecGen3 bit_op; extern const GVecGen3 bif_op; +extern const GVecGen3 mla_op[4]; +extern const GVecGen3 mls_op[4]; extern const GVecGen2i ssra_op[4]; extern const GVecGen2i usra_op[4]; extern const GVecGen2i sri_op[4]; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index c26168d72c8..fd4d8072da2 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -10410,66 +10410,6 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) } } -static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_add_u8(d, d, a); -} - -static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_add_u16(d, d, a); -} - -static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_add_i32(d, d, a); -} - -static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_add_i64(d, d, a); -} - -static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_sub_u8(d, d, a); -} - -static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_sub_u16(d, d, a); -} - -static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_sub_i32(d, d, a); -} - -static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_sub_i64(d, d, a); -} - -static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_sub_vec(vece, d, d, a); -} - /* Integer op subgroup of C3.6.16. */ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) { @@ -10488,52 +10428,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; - static const GVecGen3 mla_op[4] = { - { .fni4 = gen_mla8_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mla16_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mla32_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mla64_i64, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; - static const GVecGen3 mls_op[4] = { - { .fni4 = gen_mls8_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mls16_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mls32_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mls64_i64, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; int is_q = extract32(insn, 30, 1); int u = extract32(insn, 29, 1); diff --git a/target/arm/translate.c b/target/arm/translate.c index ee7294e54f0..d4eb66fadd6 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5468,7 +5468,7 @@ static void gen_neon_narrow_op(int op, int u, int size, #define NEON_3R_VABA 15 #define NEON_3R_VADD_VSUB 16 #define NEON_3R_VTST_VCEQ 17 -#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */ +#define NEON_3R_VML 18 /* VMLA, VMLS */ #define NEON_3R_VMUL 19 #define NEON_3R_VPMAX 20 #define NEON_3R_VPMIN 21 @@ -6032,6 +6032,117 @@ const GVecGen2i sli_op[4] = { .vece = MO_64 }, }; +static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_add_u8(d, d, a); +} + +static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_sub_u8(d, d, a); +} + +static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_add_u16(d, d, a); +} + +static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_sub_u16(d, d, a); +} + +static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_add_i32(d, d, a); +} + +static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_sub_i32(d, d, a); +} + +static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_add_i64(d, d, a); +} + +static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_sub_i64(d, d, a); +} + +static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_add_vec(vece, d, d, a); +} + +static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, d, d, a); +} + +/* Note that while NEON does not support VMLA and VMLS as 64-bit ops, + * these tables are shared with AArch64 which does support them. + */ +const GVecGen3 mla_op[4] = { + { .fni4 = gen_mla8_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mla16_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mla32_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mla64_i64, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + +const GVecGen3 mls_op[4] = { + { .fni4 = gen_mls8_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mls16_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mls32_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mls64_i64, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -6233,7 +6344,13 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 0; } break; + + case NEON_3R_VML: /* VMLA, VMLS */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, + u ? &mls_op[size] : &mla_op[size]); + return 0; } + if (size == 3) { /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { @@ -6435,21 +6552,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } } break; - case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tmp2); - tmp2 = neon_load_reg(rd, pass); - if (u) { /* VMLS */ - gen_neon_rsb(size, tmp, tmp2); - } else { /* VMLA */ - gen_neon_add(size, tmp, tmp2); - } - break; case NEON_3R_VMUL: /* VMUL.P8; other cases already eliminated. */ gen_helper_neon_mul_p8(tmp, tmp, tmp2);