[v2,07/17] RISC-V: add vector extension atomic instructions

Message ID	1568183141-67641-8-git-send-email-zhiwei_liu@c-sky.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=B9BE=XG=nongnu.org=qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org CA85A207FC From: liuzhiwei <zhiwei_liu@c-sky.com> To: Alistair.Francis@wdc.com, palmer@sifive.com, sagark@eecs.berkeley.edu, kbastian@mail.uni-paderborn.de, riku.voipio@iki.fi, laurent@vivier.eu, wenmeng_zhang@c-sky.com Date: Wed, 11 Sep 2019 14:25:31 +0800 Message-Id: <1568183141-67641-8-git-send-email-zhiwei_liu@c-sky.com> In-Reply-To: <1568183141-67641-1-git-send-email-zhiwei_liu@c-sky.com> References: <1568183141-67641-1-git-send-email-zhiwei_liu@c-sky.com> Subject: [Qemu-devel] [PATCH v2 07/17] RISC-V: add vector extension atomic instructions Precedence: list Cc: qemu-riscv@nongnu.org, qemu-devel@nongnu.org, wxy194768@alibaba-inc.com, LIU Zhiwei <zhiwei_liu@c-sky.com> Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org>
Series	RISC-V: support vector extension \| expand [v2,00/17] RISC-V: support vector extension [v2,01/17] RISC-V: add vfp field in CPURISCVState [v2,02/17] RISC-V: turn on vector extension from command line by cfg.ext_v Property [v2,03/17] RISC-V: support vector extension csr [v2,04/17] RISC-V: add vector extension configure instruction [v2,05/17] RISC-V: add vector extension load and store instructions [v2,06/17] RISC-V: add vector extension fault-only-first implementation [v2,07/17] RISC-V: add vector extension atomic instructions [v2,08/17] RISC-V: add vector extension integer instructions part1, add/sub/adc/sbc [v2,09/17] RISC-V: add vector extension integer instructions part2, bit/shift [v2,10/17] RISC-V: add vector extension integer instructions part3, cmp/min/max [v2,11/17] RISC-V: add vector extension integer instructions part4, mul/div/merge [v2,12/17] RISC-V: add vector extension fixed point instructions [v2,13/17] RISC-V: add vector extension float instruction part1, add/sub/mul/div [v2,14/17] RISC-V: add vector extension float instructions part2, sqrt/cmp/cvt/others [v2,15/17] RISC-V: add vector extension reduction instructions [v2,16/17] RISC-V: add vector extension mask instructions [v2,17/17] RISC-V: add vector extension premutation instructions

diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 973342f..c107925 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -121,5 +121,23 @@ DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32) DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32) DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32) DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32) DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32) DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index b286997..48e7661 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -63,6 +63,7 @@ @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd @r2 ....... ..... ..... ... ..... ....... %rs1 %rd +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd @r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd @r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @@ -258,6 +259,26 @@ vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm +#*** Vector AMO operations are encoded under the standard AMO major opcode.*** +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm + #*** new major opcode OP-V *** vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index bd83885..7bda378 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -47,6 +47,23 @@ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ return true; \ } +#define GEN_VECTOR_R_WDVM(INSN) \ +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ +{ \ + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ + TCGv_i32 d = tcg_const_i32(a->rd); \ + TCGv_i32 wd = tcg_const_i32(a->wd); \ + TCGv_i32 vm = tcg_const_i32(a->vm); \ + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\ + tcg_temp_free_i32(s1); \ + tcg_temp_free_i32(s2); \ + tcg_temp_free_i32(d); \ + tcg_temp_free_i32(wd); \ + tcg_temp_free_i32(vm); \ + return true; \ +} + #define GEN_VECTOR_R(INSN) \ static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ { \ @@ -119,5 +136,24 @@ GEN_VECTOR_R_NFVM(vsuxh_v) GEN_VECTOR_R_NFVM(vsuxw_v) GEN_VECTOR_R_NFVM(vsuxe_v) +GEN_VECTOR_R_WDVM(vamoswapw_v) +GEN_VECTOR_R_WDVM(vamoswapd_v) +GEN_VECTOR_R_WDVM(vamoaddw_v) +GEN_VECTOR_R_WDVM(vamoaddd_v) +GEN_VECTOR_R_WDVM(vamoxorw_v) +GEN_VECTOR_R_WDVM(vamoxord_v) +GEN_VECTOR_R_WDVM(vamoandw_v) +GEN_VECTOR_R_WDVM(vamoandd_v) +GEN_VECTOR_R_WDVM(vamoorw_v) +GEN_VECTOR_R_WDVM(vamoord_v) +GEN_VECTOR_R_WDVM(vamominw_v) +GEN_VECTOR_R_WDVM(vamomind_v) +GEN_VECTOR_R_WDVM(vamomaxw_v) +GEN_VECTOR_R_WDVM(vamomaxd_v) +GEN_VECTOR_R_WDVM(vamominuw_v) +GEN_VECTOR_R_WDVM(vamominud_v) +GEN_VECTOR_R_WDVM(vamomaxuw_v) +GEN_VECTOR_R_WDVM(vamomaxud_v) + GEN_VECTOR_R2_ZIMM(vsetvli) GEN_VECTOR_R(vsetvl) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 0ac8c74..9ebf70d 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -136,6 +136,21 @@ static bool vector_lmul_check_reg(CPURISCVState *env, uint32_t lmul, return true; } +static void vector_tail_amo(CPURISCVState *env, int vreg, int index, int width) +{ + switch (width) { + case 32: + env->vfp.vreg[vreg].u32[index] = 0; + break; + case 64: + env->vfp.vreg[vreg].u64[index] = 0; + break; + default: + helper_raise_exception(env, RISCV_EXCP_ILLEGAL_INST); + return; + } +} + static void vector_tail_segment(CPURISCVState *env, int vreg, int index, int width, int nf, int lmul) { @@ -3329,3 +3344,1455 @@ void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, env->vfp.vl = vl; env->vfp.vstart = 0; } + +void VECTOR_HELPER(vamoswapw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_xchgl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_xchgl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoswapd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_xchgq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_xchgq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoaddw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_addl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_addl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vamoaddd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_addq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_addq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoxorw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_xorl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_xorl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoxord_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_xorq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_xorq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoandw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_andl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_andl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoandd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_andq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_andq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoorw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_orl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_orl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoord_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_orq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_orq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamominw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_sminl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_sminl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamomind_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_sminq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_sminq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamomaxw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_smaxl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_smaxl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamomaxd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_smaxq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_smaxq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamominuw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_uminl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_uminl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le( + env, addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le( + env, addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamominud_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_uminl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_uminl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_uminq_le( + env, addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_uminq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamomaxuw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_umaxl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_umaxl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( + env, addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( + env, addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vamomaxud_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_umaxq_le( + env, addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_umaxq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} +

[v2,07/17] RISC-V: add vector extension atomic instructions

Commit Message

Comments

Patch