Message ID | 20200317150653.9008-11-zhiwei_liu@c-sky.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | target/riscv: support vector extension v0.7.1 | expand |
On Tue, Mar 17, 2020 at 8:27 AM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote: > > Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Alistair > --- > target/riscv/helper.h | 21 ++ > target/riscv/insn32.decode | 10 + > target/riscv/insn_trans/trans_rvv.inc.c | 251 ++++++++++++++++++++++++ > target/riscv/vector_helper.c | 149 ++++++++++++++ > 4 files changed, 431 insertions(+) > > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index 70a4b05f75..e73701d4bb 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -269,3 +269,24 @@ DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32) > DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32) > DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32) > DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32) > + > +DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) > diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode > index 1330703720..d1034a0e61 100644 > --- a/target/riscv/insn32.decode > +++ b/target/riscv/insn32.decode > @@ -44,6 +44,7 @@ > &u imm rd > &shift shamt rs1 rd > &atomic aq rl rs2 rs1 rd > +&rmrr vm rd rs1 rs2 > &rwdvm vm wd rd rs1 rs2 > &r2nfvm vm rd rs1 nf > &rnfvm vm rd rs1 rs2 nf > @@ -68,6 +69,7 @@ > @r2 ....... ..... ..... ... ..... ....... %rs1 %rd > @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd > @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd > +@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd > @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd > @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd > > @@ -275,5 +277,13 @@ vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm > vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm > > # *** new major opcode OP-V *** > +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm > +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm > +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm > +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm > +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm > +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm > +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm > + > vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm > vsetvl 1000000 ..... ..... 111 ..... 1010111 @r > diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c > index a8722ed9d2..c68f6ffe3b 100644 > --- a/target/riscv/insn_trans/trans_rvv.inc.c > +++ b/target/riscv/insn_trans/trans_rvv.inc.c > @@ -740,3 +740,254 @@ GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check) > GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check) > GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check) > #endif > + > +/* > + *** Vector Integer Arithmetic Instructions > + */ > +#define MAXSZ(s) (s->vlen >> (3 - s->lmul)) > + > +static bool opivv_check(DisasContext *s, arg_rmrr *a) > +{ > + return (vext_check_isa_ill(s) && > + vext_check_overlap_mask(s, a->rd, a->vm, false) && > + vext_check_reg(s, a->rd, false) && > + vext_check_reg(s, a->rs2, false) && > + vext_check_reg(s, a->rs1, false)); > +} > + > +typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, > + uint32_t, uint32_t, uint32_t); > + > +static inline bool > +do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, > + gen_helper_gvec_4_ptr *fn) > +{ > + if (!opivv_check(s, a)) { > + return false; > + } > + > + if (a->vm && s->vl_eq_vlmax) { > + gvec_fn(s->sew, vreg_ofs(s, a->rd), > + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), > + MAXSZ(s), MAXSZ(s)); > + } else { > + uint32_t data = 0; > + > + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); > + data = FIELD_DP32(data, VDATA, VM, a->vm); > + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); > + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), > + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), > + cpu_env, 0, s->vlen / 8, data, fn); > + } > + return true; > +} > + > +/* OPIVV with GVEC IR */ > +#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \ > +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ > +{ \ > + static gen_helper_gvec_4_ptr * const fns[4] = { \ > + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ > + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ > + }; \ > + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ > +} > + > +GEN_OPIVV_GVEC_TRANS(vadd_vv, add) > +GEN_OPIVV_GVEC_TRANS(vsub_vv, sub) > + > +typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, > + TCGv_env, TCGv_i32); > + > +static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, > + gen_helper_opivx *fn, DisasContext *s) > +{ > + TCGv_ptr dest, src2, mask; > + TCGv src1; > + TCGv_i32 desc; > + uint32_t data = 0; > + > + dest = tcg_temp_new_ptr(); > + mask = tcg_temp_new_ptr(); > + src2 = tcg_temp_new_ptr(); > + src1 = tcg_temp_new(); > + gen_get_gpr(src1, rs1); > + > + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); > + data = FIELD_DP32(data, VDATA, VM, vm); > + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); > + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); > + > + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); > + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); > + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); > + > + fn(dest, mask, src1, src2, cpu_env, desc); > + > + tcg_temp_free_ptr(dest); > + tcg_temp_free_ptr(mask); > + tcg_temp_free_ptr(src2); > + tcg_temp_free(src1); > + tcg_temp_free_i32(desc); > + return true; > +} > + > +static bool opivx_check(DisasContext *s, arg_rmrr *a) > +{ > + return (vext_check_isa_ill(s) && > + vext_check_overlap_mask(s, a->rd, a->vm, false) && > + vext_check_reg(s, a->rd, false) && > + vext_check_reg(s, a->rs2, false)); > +} > + > +typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64, > + uint32_t, uint32_t); > + > +static inline bool > +do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, > + gen_helper_opivx *fn) > +{ > + if (!opivx_check(s, a)) { > + return false; > + } > + > + if (a->vm && s->vl_eq_vlmax) { > + TCGv_i64 src1 = tcg_temp_new_i64(); > + TCGv tmp = tcg_temp_new(); > + > + gen_get_gpr(tmp, a->rs1); > + tcg_gen_ext_tl_i64(src1, tmp); > + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), > + src1, MAXSZ(s), MAXSZ(s)); > + > + tcg_temp_free_i64(src1); > + tcg_temp_free(tmp); > + return true; > + } else { > + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); > + } > + return true; > +} > + > +/* OPIVX with GVEC IR */ > +#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \ > +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ > +{ \ > + static gen_helper_opivx * const fns[4] = { \ > + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ > + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ > + }; \ > + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ > +} > + > +GEN_OPIVX_GVEC_TRANS(vadd_vx, adds) > +GEN_OPIVX_GVEC_TRANS(vsub_vx, subs) > + > +/* OPIVX without GVEC IR */ > +#define GEN_OPIVX_TRANS(NAME, CHECK) \ > +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ > +{ \ > + if (CHECK(s, a)) { \ > + static gen_helper_opivx * const fns[4] = { \ > + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ > + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ > + }; \ > + \ > + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ > + } \ > + return false; \ > +} > + > +GEN_OPIVX_TRANS(vrsub_vx, opivx_check) > + > +static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, > + gen_helper_opivx *fn, DisasContext *s, int zx) > +{ > + TCGv_ptr dest, src2, mask; > + TCGv src1; > + TCGv_i32 desc; > + uint32_t data = 0; > + > + dest = tcg_temp_new_ptr(); > + mask = tcg_temp_new_ptr(); > + src2 = tcg_temp_new_ptr(); > + if (zx) { > + src1 = tcg_const_tl(imm); > + } else { > + src1 = tcg_const_tl(sextract64(imm, 0, 5)); > + } > + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); > + data = FIELD_DP32(data, VDATA, VM, vm); > + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); > + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); > + > + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); > + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); > + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); > + > + fn(dest, mask, src1, src2, cpu_env, desc); > + > + tcg_temp_free_ptr(dest); > + tcg_temp_free_ptr(mask); > + tcg_temp_free_ptr(src2); > + tcg_temp_free(src1); > + tcg_temp_free_i32(desc); > + return true; > +} > + > +typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t, > + uint32_t, uint32_t); > + > +static inline bool > +do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, > + gen_helper_opivx *fn, int zx) > +{ > + if (!opivx_check(s, a)) { > + return false; > + } > + > + if (a->vm && s->vl_eq_vlmax) { > + if (zx) { > + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), > + extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); > + } else { > + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), > + sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); > + } > + } else { > + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx); > + } > + return true; > +} > + > +/* OPIVI with GVEC IR */ > +#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \ > +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ > +{ \ > + static gen_helper_opivx * const fns[4] = { \ > + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ > + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ > + }; \ > + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ > + fns[s->sew], ZX); \ > +} > + > +GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi) > + > +/* OPIVI without GVEC IR */ > +#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \ > +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ > +{ \ > + if (CHECK(s, a)) { \ > + static gen_helper_opivx * const fns[4] = { \ > + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ > + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ > + }; \ > + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ > + fns[s->sew], s, ZX); \ > + } \ > + return false; \ > +} > + > +GEN_OPIVI_TRANS(vrsub_vi, 0, vrsub_vx, opivx_check) > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > index 45da43ade9..27934e291b 100644 > --- a/target/riscv/vector_helper.c > +++ b/target/riscv/vector_helper.c > @@ -827,3 +827,152 @@ GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) > GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) > GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) > GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) > + > +/* > + *** Vector Integer Arithmetic Instructions > + */ > + > +/* expand macro args before macro */ > +#define RVVCALL(macro, ...) macro(__VA_ARGS__) > + > +/* (TD, T1, T2, TX1, TX2) */ > +#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t > +#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t > +#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t > +#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t > + > +/* operation of two vector elements */ > +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); > + > +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ > +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ > +{ \ > + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ > + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ > + *((TD *)vd + HD(i)) = OP(s2, s1); \ > +} > +#define DO_SUB(N, M) (N - M) > +#define DO_RSUB(N, M) (M - N) > + > +RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) > +RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) > +RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) > +RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) > +RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) > +RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) > +RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) > +RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) > + > +static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, > + CPURISCVState *env, uint32_t desc, > + uint32_t esz, uint32_t dsz, > + opivv2_fn *fn, clear_fn *clearfn) > +{ > + uint32_t vlmax = vext_maxsz(desc) / esz; > + uint32_t mlen = vext_mlen(desc); > + uint32_t vm = vext_vm(desc); > + uint32_t vl = env->vl; > + uint32_t i; > + > + if (vl == 0) { > + return; > + } > + for (i = 0; i < vl; i++) { > + if (!vm && !vext_elem_mask(v0, mlen, i)) { > + continue; > + } > + fn(vd, vs1, vs2, i); > + } > + clearfn(vd, vl, vl * dsz, vlmax * dsz); > +} > + > +/* generate the helpers for OPIVV */ > +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ > +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ > + void *vs2, CPURISCVState *env, \ > + uint32_t desc) \ > +{ \ > + do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ > + do_##NAME, CLEAR_FN); \ > +} > + > +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) > +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) > +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) > +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) > +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) > +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) > +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) > +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) > + > +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); > + > +/* > + * (T1)s1 gives the real operator type. > + * (TX1)(T1)s1 expands the operator type of widen or narrow operations. > + */ > +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ > +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ > +{ \ > + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ > + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ > +} > + > +RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) > +RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) > +RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) > +RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) > +RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) > +RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) > +RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) > +RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) > +RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) > +RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) > +RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) > +RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) > + > +static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, > + CPURISCVState *env, uint32_t desc, > + uint32_t esz, uint32_t dsz, > + opivx2_fn fn, clear_fn *clearfn) > +{ > + uint32_t vlmax = vext_maxsz(desc) / esz; > + uint32_t mlen = vext_mlen(desc); > + uint32_t vm = vext_vm(desc); > + uint32_t vl = env->vl; > + uint32_t i; > + > + if (vl == 0) { > + return; > + } > + for (i = 0; i < vl; i++) { > + if (!vm && !vext_elem_mask(v0, mlen, i)) { > + continue; > + } > + fn(vd, s1, vs2, i); > + } > + clearfn(vd, vl, vl * dsz, vlmax * dsz); > +} > + > +/* generate the helpers for OPIVX */ > +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ > +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ > + void *vs2, CPURISCVState *env, \ > + uint32_t desc) \ > +{ \ > + do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ > + do_##NAME, CLEAR_FN); \ > +} > + > +GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb) > +GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh) > +GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl) > +GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq) > +GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb) > +GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh) > +GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl) > +GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq) > +GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb) > +GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh) > +GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl) > +GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq) > -- > 2.23.0 >
On 3/17/20 8:06 AM, LIU Zhiwei wrote: > + if (a->vm && s->vl_eq_vlmax) { > + gvec_fn(s->sew, vreg_ofs(s, a->rd), > + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), > + MAXSZ(s), MAXSZ(s)); Indentation is off here. > +static inline bool > +do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, > + gen_helper_opivx *fn) > +{ > + if (!opivx_check(s, a)) { > + return false; > + } > + > + if (a->vm && s->vl_eq_vlmax) { > + TCGv_i64 src1 = tcg_temp_new_i64(); > + TCGv tmp = tcg_temp_new(); > + > + gen_get_gpr(tmp, a->rs1); > + tcg_gen_ext_tl_i64(src1, tmp); > + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), > + src1, MAXSZ(s), MAXSZ(s)); > + > + tcg_temp_free_i64(src1); > + tcg_temp_free(tmp); > + return true; > + } else { > + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); > + } > + return true; > +} This final return is unreachable, and I'm sure some static analyzer (e.g. Coverity) will complain. Since the if-then has a return, we can drop the else like so: if (a->vm && s->vl_eq_vlmax) { ... return true; } return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); Otherwise, Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
On 2020/3/28 7:54, Richard Henderson wrote: > On 3/17/20 8:06 AM, LIU Zhiwei wrote: >> + if (a->vm && s->vl_eq_vlmax) { >> + gvec_fn(s->sew, vreg_ofs(s, a->rd), >> + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), >> + MAXSZ(s), MAXSZ(s)); > Indentation is off here. Do you mean I should adjust the indentation for parameters in gvec_fn like + if (a->vm && s->vl_eq_vlmax) { + gvec_fn(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), + MAXSZ(s), MAXSZ(s)); >> +static inline bool >> +do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, >> + gen_helper_opivx *fn) >> +{ >> + if (!opivx_check(s, a)) { >> + return false; >> + } >> + >> + if (a->vm && s->vl_eq_vlmax) { >> + TCGv_i64 src1 = tcg_temp_new_i64(); >> + TCGv tmp = tcg_temp_new(); >> + >> + gen_get_gpr(tmp, a->rs1); >> + tcg_gen_ext_tl_i64(src1, tmp); >> + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), >> + src1, MAXSZ(s), MAXSZ(s)); >> + >> + tcg_temp_free_i64(src1); >> + tcg_temp_free(tmp); >> + return true; >> + } else { >> + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); >> + } >> + return true; >> +} > This final return is unreachable, and I'm sure some static analyzer (e.g. > Coverity) will complain. > > Since the if-then has a return, we can drop the else like so: > > if (a->vm && s->vl_eq_vlmax) { > ... > return true; > } > return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); Yes, it's tidier. Thanks. Zhiwei > > Otherwise, > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > > r~
diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 70a4b05f75..e73701d4bb 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -269,3 +269,24 @@ DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 1330703720..d1034a0e61 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -44,6 +44,7 @@ &u imm rd &shift shamt rs1 rd &atomic aq rl rs2 rs1 rd +&rmrr vm rd rs1 rs2 &rwdvm vm wd rd rs1 rs2 &r2nfvm vm rd rs1 nf &rnfvm vm rd rs1 rs2 nf @@ -68,6 +69,7 @@ @r2 ....... ..... ..... ... ..... ....... %rs1 %rd @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd +@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @@ -275,5 +277,13 @@ vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm # *** new major opcode OP-V *** +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm + vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index a8722ed9d2..c68f6ffe3b 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -740,3 +740,254 @@ GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check) GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check) GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check) #endif + +/* + *** Vector Integer Arithmetic Instructions + */ +#define MAXSZ(s) (s->vlen >> (3 - s->lmul)) + +static bool opivv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false)); +} + +typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +static inline bool +do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, + gen_helper_gvec_4_ptr *fn) +{ + if (!opivv_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + gvec_fn(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), + MAXSZ(s), MAXSZ(s)); + } else { + uint32_t data = 0; + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), + cpu_env, 0, s->vlen / 8, data, fn); + } + return true; +} + +/* OPIVV with GVEC IR */ +#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVV_GVEC_TRANS(vadd_vv, add) +GEN_OPIVV_GVEC_TRANS(vsub_vv, sub) + +typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, + gen_helper_opivx *fn, DisasContext *s) +{ + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + uint32_t data = 0; + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + src2 = tcg_temp_new_ptr(); + src1 = tcg_temp_new(); + gen_get_gpr(src1, rs1); + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, src1, src2, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(src2); + tcg_temp_free(src1); + tcg_temp_free_i32(desc); + return true; +} + +static bool opivx_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false)); +} + +typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64, + uint32_t, uint32_t); + +static inline bool +do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, + gen_helper_opivx *fn) +{ + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + TCGv_i64 src1 = tcg_temp_new_i64(); + TCGv tmp = tcg_temp_new(); + + gen_get_gpr(tmp, a->rs1); + tcg_gen_ext_tl_i64(src1, tmp); + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + src1, MAXSZ(s), MAXSZ(s)); + + tcg_temp_free_i64(src1); + tcg_temp_free(tmp); + return true; + } else { + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); + } + return true; +} + +/* OPIVX with GVEC IR */ +#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVX_GVEC_TRANS(vadd_vx, adds) +GEN_OPIVX_GVEC_TRANS(vsub_vx, subs) + +/* OPIVX without GVEC IR */ +#define GEN_OPIVX_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ + } \ + return false; \ +} + +GEN_OPIVX_TRANS(vrsub_vx, opivx_check) + +static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, + gen_helper_opivx *fn, DisasContext *s, int zx) +{ + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + uint32_t data = 0; + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + src2 = tcg_temp_new_ptr(); + if (zx) { + src1 = tcg_const_tl(imm); + } else { + src1 = tcg_const_tl(sextract64(imm, 0, 5)); + } + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, src1, src2, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(src2); + tcg_temp_free(src1); + tcg_temp_free_i32(desc); + return true; +} + +typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t, + uint32_t, uint32_t); + +static inline bool +do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, + gen_helper_opivx *fn, int zx) +{ + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + if (zx) { + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); + } else { + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); + } + } else { + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx); + } + return true; +} + +/* OPIVI with GVEC IR */ +#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ + fns[s->sew], ZX); \ +} + +GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi) + +/* OPIVI without GVEC IR */ +#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ + fns[s->sew], s, ZX); \ + } \ + return false; \ +} + +GEN_OPIVI_TRANS(vrsub_vi, 0, vrsub_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 45da43ade9..27934e291b 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -827,3 +827,152 @@ GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) + +/* + *** Vector Integer Arithmetic Instructions + */ + +/* expand macro args before macro */ +#define RVVCALL(macro, ...) macro(__VA_ARGS__) + +/* (TD, T1, T2, TX1, TX2) */ +#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t +#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t +#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t +#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t + +/* operation of two vector elements */ +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); + +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1); \ +} +#define DO_SUB(N, M) (N - M) +#define DO_RSUB(N, M) (M - N) + +RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) +RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) + +static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, uint32_t desc, + uint32_t esz, uint32_t dsz, + opivv2_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + + if (vl == 0) { + return; + } + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, vs1, vs2, i); + } + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate the helpers for OPIVV */ +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) + +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); + +/* + * (T1)s1 gives the real operator type. + * (TX1)(T1)s1 expands the operator type of widen or narrow operations. + */ +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ +} + +RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) +RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) +RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) + +static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, uint32_t desc, + uint32_t esz, uint32_t dsz, + opivx2_fn fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + + if (vl == 0) { + return; + } + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, s1, vs2, i); + } + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate the helpers for OPIVX */ +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq)
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> --- target/riscv/helper.h | 21 ++ target/riscv/insn32.decode | 10 + target/riscv/insn_trans/trans_rvv.inc.c | 251 ++++++++++++++++++++++++ target/riscv/vector_helper.c | 149 ++++++++++++++ 4 files changed, 431 insertions(+)