[PULL,14/42] target/arm: Split out gengvec64.c

Message ID	20240528140753.3620597-15-peter.maydell@linaro.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org> From: Peter Maydell <peter.maydell@linaro.org> To: qemu-devel@nongnu.org Subject: [PULL 14/42] target/arm: Split out gengvec64.c Date: Tue, 28 May 2024 15:07:25 +0100 Message-Id: <20240528140753.3620597-15-peter.maydell@linaro.org> In-Reply-To: <20240528140753.3620597-1-peter.maydell@linaro.org> References: <20240528140753.3620597-1-peter.maydell@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Received-SPF: pass client-ip=2a00:1450:4864:20::42f; envelope-from=peter.maydell@linaro.org; helo=mail-wr1-x42f.google.com X-Spam_score_int: -20 X-Spam_score: -2.1 X-Spam_bar: -- X-Spam_report: (-2.1 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001, T_SCC_BODY_TEXT_LINE=-0.01, T_SPF_TEMPERROR=0.01 autolearn=ham autolearn_force=no X-Spam_action: no action Precedence: list Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
Series	[PULL,01/42] xlnx_dpdma: fix descriptor endianness bug \| expand [PULL,01/42] xlnx_dpdma: fix descriptor endianness bug [PULL,02/42] hvf: arm: Fix encodings for ID_AA64PFR1_EL1 and debug System registers [PULL,03/42] hw/arm/npcm7xx: remove setting of mp-affinity [PULL,04/42] hw/char: Correct STM32L4x5 usart register CR2 field ADD_0 size [PULL,05/42] hw/intc/arm_gic: Fix handling of NS view of GICC_APR<n> [PULL,06/42] hw/input/tsc2005: Fix -Wchar-subscripts warning in tsc2005_txrx() [PULL,07/42] hw: arm: Remove use of tabs in some source files [PULL,08/42] docs/system: Remove ADC from raspi documentation [PULL,09/42] target/arm: Use PLD, PLDW, PLI not NOP for t32 [PULL,10/42] target/arm: Zero-extend writeback for fp16 FCVTZS (scalar, integer) [PULL,11/42] target/arm: Fix decode of FMOV (hp) vs MOVI [PULL,12/42] target/arm: Verify sz=0 for Advanced SIMD scalar pairwise (fp16) [PULL,13/42] target/arm: Split out gengvec.c [PULL,14/42] target/arm: Split out gengvec64.c [PULL,15/42] target/arm: Convert Cryptographic AES to decodetree [PULL,16/42] target/arm: Convert Cryptographic 3-register SHA to decodetree [PULL,17/42] target/arm: Convert Cryptographic 2-register SHA to decodetree [PULL,18/42] target/arm: Convert Cryptographic 3-register SHA512 to decodetree [PULL,19/42] target/arm: Convert Cryptographic 2-register SHA512 to decodetree [PULL,20/42] target/arm: Convert Cryptographic 4-register to decodetree [PULL,21/42] target/arm: Convert Cryptographic 3-register, imm2 to decodetree [PULL,22/42] target/arm: Convert XAR to decodetree [PULL,23/42] target/arm: Convert Advanced SIMD copy to decodetree [PULL,24/42] target/arm: Convert FMULX to decodetree [PULL,25/42] target/arm: Convert FADD, FSUB, FDIV, FMUL to decodetree [PULL,26/42] target/arm: Convert FMAX, FMIN, FMAXNM, FMINNM to decodetree [PULL,27/42] target/arm: Introduce vfp_load_reg16 [PULL,28/42] target/arm: Expand vfp neg and abs inline [PULL,29/42] target/arm: Convert FNMUL to decodetree [PULL,30/42] target/arm: Convert FMLA, FMLS to decodetree [PULL,31/42] target/arm: Convert FCMEQ, FCMGE, FCMGT, FACGE, FACGT to decodetree [PULL,32/42] target/arm: Convert FABD to decodetree [PULL,33/42] target/arm: Convert FRECPS, FRSQRTS to decodetree [PULL,34/42] target/arm: Convert FADDP to decodetree [PULL,35/42] target/arm: Convert FMAXP, FMINP, FMAXNMP, FMINNMP to decodetree [PULL,36/42] target/arm: Use gvec for neon faddp, fmaxp, fminp [PULL,37/42] target/arm: Convert ADDP to decodetree [PULL,38/42] target/arm: Use gvec for neon padd [PULL,39/42] target/arm: Convert SMAXP, SMINP, UMAXP, UMINP to decodetree [PULL,40/42] target/arm: Use gvec for neon pmax, pmin [PULL,41/42] target/arm: Convert FMLAL, FMLSL to decodetree [PULL,42/42] target/arm: Convert disas_simd_3same_logic to decodetree

diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h index 7b811b8ac51..91750f0ca91 100644 --- a/target/arm/tcg/translate-a64.h +++ b/target/arm/tcg/translate-a64.h @@ -193,6 +193,10 @@ void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz); +void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz); void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); diff --git a/target/arm/tcg/gengvec64.c b/target/arm/tcg/gengvec64.c new file mode 100644 index 00000000000..093b498b13d --- /dev/null +++ b/target/arm/tcg/gengvec64.c @@ -0,0 +1,190 @@ +/* + * AArch64 generic vector expansion + * + * Copyright (c) 2013 Alexander Graf <agraf@suse.de> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "translate.h" +#include "translate-a64.h" + + +static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + tcg_gen_rotli_i64(d, m, 1); + tcg_gen_xor_i64(d, d, n); +} + +static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) +{ + tcg_gen_rotli_vec(vece, d, m, 1); + tcg_gen_xor_vec(vece, d, d, n); +} + +void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen3 op = { + .fni8 = gen_rax1_i64, + .fniv = gen_rax1_vec, + .opt_opc = vecop_list, + .fno = gen_helper_crypto_rax1, + .vece = MO_64, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); +} + +static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + uint64_t mask = dup_const(MO_8, 0xff >> sh); + + tcg_gen_xor_i64(t, n, m); + tcg_gen_shri_i64(d, t, sh); + tcg_gen_shli_i64(t, t, 8 - sh); + tcg_gen_andi_i64(d, d, mask); + tcg_gen_andi_i64(t, t, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + uint64_t mask = dup_const(MO_16, 0xffff >> sh); + + tcg_gen_xor_i64(t, n, m); + tcg_gen_shri_i64(d, t, sh); + tcg_gen_shli_i64(t, t, 16 - sh); + tcg_gen_andi_i64(d, d, mask); + tcg_gen_andi_i64(t, t, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) +{ + tcg_gen_xor_i32(d, n, m); + tcg_gen_rotri_i32(d, d, sh); +} + +static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) +{ + tcg_gen_xor_i64(d, n, m); + tcg_gen_rotri_i64(d, d, sh); +} + +static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, int64_t sh) +{ + tcg_gen_xor_vec(vece, d, n, m); + tcg_gen_rotri_vec(vece, d, d, sh); +} + +void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, int64_t shift, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen3i ops[4] = { + { .fni8 = gen_xar8_i64, + .fniv = gen_xar_vec, + .fno = gen_helper_sve2_xar_b, + .opt_opc = vecop, + .vece = MO_8 }, + { .fni8 = gen_xar16_i64, + .fniv = gen_xar_vec, + .fno = gen_helper_sve2_xar_h, + .opt_opc = vecop, + .vece = MO_16 }, + { .fni4 = gen_xar_i32, + .fniv = gen_xar_vec, + .fno = gen_helper_sve2_xar_s, + .opt_opc = vecop, + .vece = MO_32 }, + { .fni8 = gen_xar_i64, + .fniv = gen_xar_vec, + .fno = gen_helper_gvec_xar_d, + .opt_opc = vecop, + .vece = MO_64 } + }; + int esize = 8 << vece; + + /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ + tcg_debug_assert(shift >= 0); + tcg_debug_assert(shift <= esize); + shift &= esize - 1; + + if (shift == 0) { + /* xar with no rotate devolves to xor. */ + tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); + } else { + tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, + shift, &ops[vece]); + } +} + +static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) +{ + tcg_gen_xor_i64(d, n, m); + tcg_gen_xor_i64(d, d, k); +} + +static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, TCGv_vec k) +{ + tcg_gen_xor_vec(vece, d, n, m); + tcg_gen_xor_vec(vece, d, d, k); +} + +void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen4 op = { + .fni8 = gen_eor3_i64, + .fniv = gen_eor3_vec, + .fno = gen_helper_sve2_eor3, + .vece = MO_64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + }; + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); +} + +static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) +{ + tcg_gen_andc_i64(d, m, k); + tcg_gen_xor_i64(d, d, n); +} + +static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, TCGv_vec k) +{ + tcg_gen_andc_vec(vece, d, m, k); + tcg_gen_xor_vec(vece, d, d, n); +} + +void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen4 op = { + .fni8 = gen_bcax_i64, + .fniv = gen_bcax_vec, + .fno = gen_helper_sve2_bcax, + .vece = MO_64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + }; + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); +} + diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 0bdddb8517a..8842ff634d5 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -13623,32 +13623,6 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); } -static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) -{ - tcg_gen_rotli_i64(d, m, 1); - tcg_gen_xor_i64(d, d, n); -} - -static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) -{ - tcg_gen_rotli_vec(vece, d, m, 1); - tcg_gen_xor_vec(vece, d, d, n); -} - -void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; - static const GVecGen3 op = { - .fni8 = gen_rax1_i64, - .fniv = gen_rax1_vec, - .opt_opc = vecop_list, - .fno = gen_helper_crypto_rax1, - .vece = MO_64, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); -} - /* Crypto three-reg SHA512 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 * +-----------------------+------+---+---+-----+--------+------+------+ diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index ada05aa5302..798ab2bfb13 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -527,94 +527,6 @@ TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) -static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - uint64_t mask = dup_const(MO_8, 0xff >> sh); - - tcg_gen_xor_i64(t, n, m); - tcg_gen_shri_i64(d, t, sh); - tcg_gen_shli_i64(t, t, 8 - sh); - tcg_gen_andi_i64(d, d, mask); - tcg_gen_andi_i64(t, t, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - uint64_t mask = dup_const(MO_16, 0xffff >> sh); - - tcg_gen_xor_i64(t, n, m); - tcg_gen_shri_i64(d, t, sh); - tcg_gen_shli_i64(t, t, 16 - sh); - tcg_gen_andi_i64(d, d, mask); - tcg_gen_andi_i64(t, t, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) -{ - tcg_gen_xor_i32(d, n, m); - tcg_gen_rotri_i32(d, d, sh); -} - -static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) -{ - tcg_gen_xor_i64(d, n, m); - tcg_gen_rotri_i64(d, d, sh); -} - -static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, - TCGv_vec m, int64_t sh) -{ - tcg_gen_xor_vec(vece, d, n, m); - tcg_gen_rotri_vec(vece, d, d, sh); -} - -void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, int64_t shift, - uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; - static const GVecGen3i ops[4] = { - { .fni8 = gen_xar8_i64, - .fniv = gen_xar_vec, - .fno = gen_helper_sve2_xar_b, - .opt_opc = vecop, - .vece = MO_8 }, - { .fni8 = gen_xar16_i64, - .fniv = gen_xar_vec, - .fno = gen_helper_sve2_xar_h, - .opt_opc = vecop, - .vece = MO_16 }, - { .fni4 = gen_xar_i32, - .fniv = gen_xar_vec, - .fno = gen_helper_sve2_xar_s, - .opt_opc = vecop, - .vece = MO_32 }, - { .fni8 = gen_xar_i64, - .fniv = gen_xar_vec, - .fno = gen_helper_gvec_xar_d, - .opt_opc = vecop, - .vece = MO_64 } - }; - int esize = 8 << vece; - - /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ - tcg_debug_assert(shift >= 0); - tcg_debug_assert(shift <= esize); - shift &= esize - 1; - - if (shift == 0) { - /* xar with no rotate devolves to xor. */ - tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); - } else { - tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, - shift, &ops[vece]); - } -} - static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) { if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { @@ -629,61 +541,8 @@ static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) return true; } -static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) -{ - tcg_gen_xor_i64(d, n, m); - tcg_gen_xor_i64(d, d, k); -} - -static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, - TCGv_vec m, TCGv_vec k) -{ - tcg_gen_xor_vec(vece, d, n, m); - tcg_gen_xor_vec(vece, d, d, k); -} - -static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, - uint32_t a, uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen4 op = { - .fni8 = gen_eor3_i64, - .fniv = gen_eor3_vec, - .fno = gen_helper_sve2_eor3, - .vece = MO_64, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; - tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); -} - -TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) - -static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) -{ - tcg_gen_andc_i64(d, m, k); - tcg_gen_xor_i64(d, d, n); -} - -static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, - TCGv_vec m, TCGv_vec k) -{ - tcg_gen_andc_vec(vece, d, m, k); - tcg_gen_xor_vec(vece, d, d, n); -} - -static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, - uint32_t a, uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen4 op = { - .fni8 = gen_bcax_i64, - .fniv = gen_bcax_vec, - .fno = gen_helper_sve2_bcax, - .vece = MO_64, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; - tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); -} - -TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) +TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a) +TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a) static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, uint32_t a, uint32_t oprsz, uint32_t maxsz) diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build index bdb5c7352f2..508932a249f 100644 --- a/target/arm/tcg/meson.build +++ b/target/arm/tcg/meson.build @@ -43,6 +43,7 @@ arm_ss.add(files( arm_ss.add(when: 'TARGET_AARCH64', if_true: files( 'cpu64.c', + 'gengvec64.c', 'translate-a64.c', 'translate-sve.c', 'translate-sme.c',

[PULL,14/42] target/arm: Split out gengvec64.c

Commit Message

Patch