From patchwork Mon Jan 6 02:43:00 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chao Liu X-Patchwork-Id: 13926894 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id EF901E77197 for ; Mon, 6 Jan 2025 02:44:59 +0000 (UTC) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1tUd60-0002GB-Bw; Sun, 05 Jan 2025 21:44:00 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1tUd5z-0002Fe-6Q; Sun, 05 Jan 2025 21:43:59 -0500 Received: from out28-76.mail.aliyun.com ([115.124.28.76]) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1tUd5t-0005K5-3L; Sun, 05 Jan 2025 21:43:58 -0500 Received: from TC-DZ-03-0020.tc.local(mailfrom:lc00631@tecorigin.com fp:SMTPD_---.awUx1.M_1736131416 cluster:ay29) by smtp.aliyun-inc.com; Mon, 06 Jan 2025 10:43:37 +0800 From: Chao Liu To: alistair.francis@wdc.com, dbarboza@ventanamicro.com, bmeng.cn@gmail.com, palmer@dabbelt.com, liwei1518@gmail.com, zhiwei_liu@linux.alibaba.com Cc: qemu-riscv@nongnu.org, qemu-devel@nongnu.org, zqz00548@tecorigin.com, Chao Liu Subject: [PATCH v3 1/2] target/riscv: refactor VSTART_CHECK_EARLY_EXIT() to accept vl as a parameter Date: Mon, 6 Jan 2025 10:43:00 +0800 Message-ID: X-Mailer: git-send-email 2.47.1 In-Reply-To: References: MIME-Version: 1.0 Received-SPF: pass client-ip=115.124.28.76; envelope-from=lc00631@tecorigin.com; helo=out28-76.mail.aliyun.com X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, RCVD_IN_DNSWL_NONE=-0.0001, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RCVD_IN_VALIDITY_RPBL_BLOCKED=0.001, RCVD_IN_VALIDITY_SAFE_BLOCKED=0.001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, UNPARSEABLE_RELAY=0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org Some vector instructions are special, such as the vlm.v instruction, where setting its vl actually sets evl = (vl + 7) >> 3. To improve maintainability, we will uniformly use VSTART_CHECK_EARLY_EXIT() to check for the condition vstart >= vl. This function will also handle cases involving evl. Fixes: df4252b2ec ("target/riscv/vector_helpers: do early exit when vstart >= vl") Signed-off-by: Chao Liu --- target/riscv/vcrypto_helper.c | 32 +++++++-------- target/riscv/vector_helper.c | 69 ++++++++++++++++----------------- target/riscv/vector_internals.c | 4 +- target/riscv/vector_internals.h | 12 +++--- 4 files changed, 57 insertions(+), 60 deletions(-) diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c index f7423df226..1526de96f5 100644 --- a/target/riscv/vcrypto_helper.c +++ b/target/riscv/vcrypto_helper.c @@ -222,7 +222,7 @@ static inline void xor_round_key(AESState *round_state, AESState *round_key) uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ uint32_t vta = vext_vta(desc); \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ AESState round_key; \ @@ -248,7 +248,7 @@ static inline void xor_round_key(AESState *round_state, AESState *round_key) uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ uint32_t vta = vext_vta(desc); \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ AESState round_key; \ @@ -309,7 +309,7 @@ void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, uint32_t total_elems = vext_get_total_elems(env, desc, 4); uint32_t vta = vext_vta(desc); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); uimm &= 0b1111; if (uimm > 10 || uimm == 0) { @@ -357,7 +357,7 @@ void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, uint32_t total_elems = vext_get_total_elems(env, desc, 4); uint32_t vta = vext_vta(desc); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); uimm &= 0b1111; if (uimm > 14 || uimm < 2) { @@ -465,7 +465,7 @@ void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, uint32_t total_elems; uint32_t vta = vext_vta(desc); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { if (sew == MO_32) { @@ -582,7 +582,7 @@ void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, uint32_t total_elems; uint32_t vta = vext_vta(desc); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, @@ -602,7 +602,7 @@ void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, uint32_t total_elems; uint32_t vta = vext_vta(desc); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, @@ -622,7 +622,7 @@ void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, uint32_t total_elems; uint32_t vta = vext_vta(desc); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, @@ -642,7 +642,7 @@ void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, uint32_t total_elems; uint32_t vta = vext_vta(desc); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, @@ -676,7 +676,7 @@ void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, uint32_t *vs1 = vs1_vptr; uint32_t *vs2 = vs2_vptr; - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (int i = env->vstart / 8; i < env->vl / 8; i++) { uint32_t w[24]; @@ -777,7 +777,7 @@ void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, uint32_t *vs2 = vs2_vptr; uint32_t v1[8], v2[8], v3[8]; - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (int i = env->vstart / 8; i < env->vl / 8; i++) { for (int k = 0; k < 8; k++) { @@ -802,7 +802,7 @@ void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, uint32_t vta = vext_vta(desc); uint32_t total_elems = vext_get_total_elems(env, desc, 4); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { uint64_t Y[2] = {vd[i * 2 + 0], vd[i * 2 + 1]}; @@ -841,7 +841,7 @@ void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env, uint32_t vta = vext_vta(desc); uint32_t total_elems = vext_get_total_elems(env, desc, 4); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { uint64_t Y[2] = {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])}; @@ -879,7 +879,7 @@ void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uimm5, CPURISCVState *env, uint32_t esz = sizeof(uint32_t); uint32_t total_elems = vext_get_total_elems(env, desc, esz); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = group_start; i < group_end; ++i) { uint32_t vstart = i * egs; @@ -937,7 +937,7 @@ void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) uint32_t esz = sizeof(uint32_t); uint32_t total_elems = vext_get_total_elems(env, desc, esz); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = group_start; i < group_end; ++i) { uint32_t vstart = i * egs; @@ -973,7 +973,7 @@ void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) uint32_t esz = sizeof(uint32_t); uint32_t total_elems = vext_get_total_elems(env, desc, esz); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (uint32_t i = group_start; i < group_end; ++i) { uint32_t vstart = i * egs; diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index a85dd1d200..4f14395808 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -265,7 +265,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, uint32_t esz = 1 << log2_esz; uint32_t vma = vext_vma(desc); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); for (i = env->vstart; i < env->vl; env->vstart = ++i) { k = 0; @@ -388,10 +388,7 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, uint32_t msize = nf * esz; int mmu_index = riscv_env_mmu_index(env, false); - if (env->vstart >= evl) { - env->vstart = 0; - return; - } + VSTART_CHECK_EARLY_EXIT(env, evl); /* Calculate the page range of first page */ addr = base + ((env->vstart * nf) << log2_esz); @@ -533,7 +530,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t esz = 1 << log2_esz; uint32_t vma = vext_vma(desc); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); /* load bytes from guest memory */ for (i = env->vstart; i < env->vl; env->vstart = ++i) { @@ -625,7 +622,7 @@ vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, target_ulong addr, offset, remain, page_split, elems; int mmu_index = riscv_env_mmu_index(env, false); - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, env->vl); /* probe every access */ for (i = env->vstart; i < env->vl; i++) { @@ -1092,7 +1089,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ @@ -1126,7 +1123,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ @@ -1163,7 +1160,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ @@ -1203,7 +1200,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ @@ -1301,7 +1298,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -1350,7 +1347,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -1414,7 +1411,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ @@ -1481,7 +1478,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ @@ -2030,7 +2027,7 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ @@ -2056,7 +2053,7 @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ @@ -2081,7 +2078,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ @@ -2107,7 +2104,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ @@ -2154,7 +2151,7 @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, uint32_t vl, uint32_t vm, int vxrm, opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) { - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); for (uint32_t i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -2281,7 +2278,7 @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, uint32_t vl, uint32_t vm, int vxrm, opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) { - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); for (uint32_t i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -3080,7 +3077,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -3125,7 +3122,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -3713,7 +3710,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ if (vl == 0) { \ return; \ @@ -4236,7 +4233,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ @@ -4278,7 +4275,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ @@ -4473,7 +4470,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ @@ -4799,7 +4796,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t i; \ int a, b; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ a = vext_elem_mask(vs1, i); \ @@ -4994,7 +4991,7 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ uint32_t vma = vext_vma(desc); \ int i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -5031,7 +5028,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t vma = vext_vma(desc); \ target_ulong offset = s1, i_min, i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ i_min = MAX(env->vstart, offset); \ for (i = i_min; i < vl; i++) { \ @@ -5066,7 +5063,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t vma = vext_vma(desc); \ target_ulong i_max, i_min, i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ i_max = MAX(i_min, env->vstart); \ @@ -5110,7 +5107,7 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -5161,7 +5158,7 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -5238,7 +5235,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint64_t index; \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -5283,7 +5280,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint64_t index = s1; \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -5379,7 +5376,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c index 05b2d01e58..b490b1d398 100644 --- a/target/riscv/vector_internals.c +++ b/target/riscv/vector_internals.c @@ -66,7 +66,7 @@ void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, uint32_t vma = vext_vma(desc); uint32_t i; - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); for (i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -92,7 +92,7 @@ void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, uint32_t vma = vext_vma(desc); uint32_t i; - VSTART_CHECK_EARLY_EXIT(env); + VSTART_CHECK_EARLY_EXIT(env, vl); for (i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h index a11cc8366d..8eee7e5c31 100644 --- a/target/riscv/vector_internals.h +++ b/target/riscv/vector_internals.h @@ -25,11 +25,11 @@ #include "tcg/tcg-gvec-desc.h" #include "internals.h" -#define VSTART_CHECK_EARLY_EXIT(env) do { \ - if (env->vstart >= env->vl) { \ - env->vstart = 0; \ - return; \ - } \ +#define VSTART_CHECK_EARLY_EXIT(env, vl) do { \ + if (env->vstart >= vl) { \ + env->vstart = 0; \ + return; \ + } \ } while (0) static inline uint32_t vext_nf(uint32_t desc) @@ -159,7 +159,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ - VSTART_CHECK_EARLY_EXIT(env); \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ From patchwork Mon Jan 6 02:43:01 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chao Liu X-Patchwork-Id: 13926895 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 00FC8E77197 for ; Mon, 6 Jan 2025 02:45:14 +0000 (UTC) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1tUd61-0002GU-6i; Sun, 05 Jan 2025 21:44:01 -0500 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1tUd5v-0002Eg-Sl; Sun, 05 Jan 2025 21:43:55 -0500 Received: from out28-170.mail.aliyun.com ([115.124.28.170]) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1tUd5t-0005K4-5p; Sun, 05 Jan 2025 21:43:55 -0500 Received: from TC-DZ-03-0020.tc.local(mailfrom:lc00631@tecorigin.com fp:SMTPD_---.awUx10-_1736131417 cluster:ay29) by smtp.aliyun-inc.com; Mon, 06 Jan 2025 10:43:37 +0800 From: Chao Liu To: alistair.francis@wdc.com, dbarboza@ventanamicro.com, bmeng.cn@gmail.com, palmer@dabbelt.com, liwei1518@gmail.com, zhiwei_liu@linux.alibaba.com Cc: qemu-riscv@nongnu.org, qemu-devel@nongnu.org, zqz00548@tecorigin.com, Chao Liu Subject: [PATCH v3 2/2] target/riscv: fix handling of nop for vstart >= vl in some vector instruction Date: Mon, 6 Jan 2025 10:43:01 +0800 Message-ID: X-Mailer: git-send-email 2.47.1 In-Reply-To: References: MIME-Version: 1.0 Received-SPF: pass client-ip=115.124.28.170; envelope-from=lc00631@tecorigin.com; helo=out28-170.mail.aliyun.com X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, RCVD_IN_DNSWL_NONE=-0.0001, RCVD_IN_MSPIKE_H2=-0.001, RCVD_IN_VALIDITY_RPBL_BLOCKED=0.001, RCVD_IN_VALIDITY_SAFE_BLOCKED=0.001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, UNPARSEABLE_RELAY=0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org Recently, when I was writing a RISCV test, I found that when VL is set to 0, the instruction should be nop, but when I tested it, I found that QEMU will treat all elements as tail elements, and in the case of VTA=1, write all elements to 1. After troubleshooting, it was found that the vext_vx_rm_1 function was called in the vext_vx_rm_2, and then the vext_set_elems_1s function was called to process the tail element, but only VSTART >= vl was checked in the vext_vx_rm_1 function, which caused the tail element to still be processed even if it was returned in advance. So I've made the following change: Put VSTART_CHECK_EARLY_EXIT(env) at the beginning of the vext_vx_rm_2 function, so that the VSTART register is checked correctly. Fixes: df4252b2ec ("target/riscv/vector_helpers: do early exit when vstart >= vl") Signed-off-by: Chao Liu --- target/riscv/vector_helper.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 4f14395808..5f1fc24d99 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -2151,8 +2151,6 @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, uint32_t vl, uint32_t vm, int vxrm, opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) { - VSTART_CHECK_EARLY_EXIT(env, vl); - for (uint32_t i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { /* set masked-off elements to 1s */ @@ -2176,6 +2174,8 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, uint32_t vta = vext_vta(desc); uint32_t vma = vext_vma(desc); + VSTART_CHECK_EARLY_EXIT(env, vl); + switch (env->vxrm) { case 0: /* rnu */ vext_vv_rm_1(vd, v0, vs1, vs2, @@ -2278,8 +2278,6 @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, uint32_t vl, uint32_t vm, int vxrm, opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) { - VSTART_CHECK_EARLY_EXIT(env, vl); - for (uint32_t i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { /* set masked-off elements to 1s */ @@ -2303,6 +2301,8 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, uint32_t vta = vext_vta(desc); uint32_t vma = vext_vma(desc); + VSTART_CHECK_EARLY_EXIT(env, vl); + switch (env->vxrm) { case 0: /* rnu */ vext_vx_rm_1(vd, v0, s1, vs2, @@ -4638,6 +4638,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t i; \ TD s1 = *((TD *)vs1 + HD(0)); \ \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ + \ for (i = env->vstart; i < vl; i++) { \ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -4724,6 +4726,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t i; \ TD s1 = *((TD *)vs1 + HD(0)); \ \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ + \ for (i = env->vstart; i < vl; i++) { \ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ if (!vm && !vext_elem_mask(v0, i)) { \ @@ -4886,6 +4890,8 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, int i; bool first_mask_bit = false; + VSTART_CHECK_EARLY_EXIT(env, vl); + for (i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { /* set masked-off elements to 1s */ @@ -4958,6 +4964,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ uint32_t sum = 0; \ int i; \ \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ + \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ /* set masked-off elements to 1s */ \ @@ -5316,6 +5324,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vta = vext_vta(desc); \ uint32_t num = 0, i; \ \ + VSTART_CHECK_EARLY_EXIT(env, vl); \ + \ for (i = env->vstart; i < vl; i++) { \ if (!vext_elem_mask(vs1, i)) { \ continue; \