Message ID | 20220113014959.21429-4-liweiwei@iscas.ac.cn (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | support subsets of Float-Point in Integer Registers extensions | expand |
On Thu, Jan 13, 2022 at 11:52 AM Weiwei Li <liweiwei@iscas.ac.cn> wrote: > > - update extension check REQUIRE_ZFINX_OR_F > - update single float point register read/write > - disable nanbox_s check > > Co-authored-by: ardxwe <ardxwe@gmail.com> > Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> > Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/riscv/fpu_helper.c | 89 +++---- > target/riscv/helper.h | 2 +- > target/riscv/insn_trans/trans_rvf.c.inc | 314 ++++++++++++++++-------- > target/riscv/internals.h | 16 +- > target/riscv/translate.c | 90 +++++++ > 5 files changed, 367 insertions(+), 144 deletions(-) > > diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c > index 4a5982d594..63ca703459 100644 > --- a/target/riscv/fpu_helper.c > +++ b/target/riscv/fpu_helper.c > @@ -98,10 +98,11 @@ static uint64_t do_fmadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2, > static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2, > uint64_t rs3, int flags) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > - float32 frs3 = check_nanbox_s(rs3); > - return nanbox_s(float32_muladd(frs1, frs2, frs3, flags, &env->fp_status)); > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > + float32 frs3 = check_nanbox_s(env, rs3); > + return nanbox_s(env, float32_muladd(frs1, frs2, frs3, flags, > + &env->fp_status)); > } > > uint64_t helper_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > @@ -183,124 +184,124 @@ uint64_t helper_fnmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > > uint64_t helper_fadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > - return nanbox_s(float32_add(frs1, frs2, &env->fp_status)); > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > + return nanbox_s(env, float32_add(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fsub_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > - return nanbox_s(float32_sub(frs1, frs2, &env->fp_status)); > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > + return nanbox_s(env, float32_sub(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fmul_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > - return nanbox_s(float32_mul(frs1, frs2, &env->fp_status)); > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > + return nanbox_s(env, float32_mul(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fdiv_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > - return nanbox_s(float32_div(frs1, frs2, &env->fp_status)); > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > + return nanbox_s(env, float32_div(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fmin_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > - return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ? > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > + return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ? > float32_minnum(frs1, frs2, &env->fp_status) : > float32_minimum_number(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fmax_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > - return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ? > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > + return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ? > float32_maxnum(frs1, frs2, &env->fp_status) : > float32_maximum_number(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t rs1) > { > - float32 frs1 = check_nanbox_s(rs1); > - return nanbox_s(float32_sqrt(frs1, &env->fp_status)); > + float32 frs1 = check_nanbox_s(env, rs1); > + return nanbox_s(env, float32_sqrt(frs1, &env->fp_status)); > } > > target_ulong helper_fle_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > return float32_le(frs1, frs2, &env->fp_status); > } > > target_ulong helper_flt_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > return float32_lt(frs1, frs2, &env->fp_status); > } > > target_ulong helper_feq_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) > { > - float32 frs1 = check_nanbox_s(rs1); > - float32 frs2 = check_nanbox_s(rs2); > + float32 frs1 = check_nanbox_s(env, rs1); > + float32 frs2 = check_nanbox_s(env, rs2); > return float32_eq_quiet(frs1, frs2, &env->fp_status); > } > > target_ulong helper_fcvt_w_s(CPURISCVState *env, uint64_t rs1) > { > - float32 frs1 = check_nanbox_s(rs1); > + float32 frs1 = check_nanbox_s(env, rs1); > return float32_to_int32(frs1, &env->fp_status); > } > > target_ulong helper_fcvt_wu_s(CPURISCVState *env, uint64_t rs1) > { > - float32 frs1 = check_nanbox_s(rs1); > + float32 frs1 = check_nanbox_s(env, rs1); > return (int32_t)float32_to_uint32(frs1, &env->fp_status); > } > > target_ulong helper_fcvt_l_s(CPURISCVState *env, uint64_t rs1) > { > - float32 frs1 = check_nanbox_s(rs1); > + float32 frs1 = check_nanbox_s(env, rs1); > return float32_to_int64(frs1, &env->fp_status); > } > > target_ulong helper_fcvt_lu_s(CPURISCVState *env, uint64_t rs1) > { > - float32 frs1 = check_nanbox_s(rs1); > + float32 frs1 = check_nanbox_s(env, rs1); > return float32_to_uint64(frs1, &env->fp_status); > } > > uint64_t helper_fcvt_s_w(CPURISCVState *env, target_ulong rs1) > { > - return nanbox_s(int32_to_float32((int32_t)rs1, &env->fp_status)); > + return nanbox_s(env, int32_to_float32((int32_t)rs1, &env->fp_status)); > } > > uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1) > { > - return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status)); > + return nanbox_s(env, uint32_to_float32((uint32_t)rs1, &env->fp_status)); > } > > uint64_t helper_fcvt_s_l(CPURISCVState *env, target_ulong rs1) > { > - return nanbox_s(int64_to_float32(rs1, &env->fp_status)); > + return nanbox_s(env, int64_to_float32(rs1, &env->fp_status)); > } > > uint64_t helper_fcvt_s_lu(CPURISCVState *env, target_ulong rs1) > { > - return nanbox_s(uint64_to_float32(rs1, &env->fp_status)); > + return nanbox_s(env, uint64_to_float32(rs1, &env->fp_status)); > } > > -target_ulong helper_fclass_s(uint64_t rs1) > +target_ulong helper_fclass_s(CPURISCVState *env, uint64_t rs1) > { > - float32 frs1 = check_nanbox_s(rs1); > + float32 frs1 = check_nanbox_s(env, rs1); > return fclass_s(frs1); > } > > @@ -340,12 +341,12 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) > > uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1) > { > - return nanbox_s(float64_to_float32(rs1, &env->fp_status)); > + return nanbox_s(env, float64_to_float32(rs1, &env->fp_status)); > } > > uint64_t helper_fcvt_d_s(CPURISCVState *env, uint64_t rs1) > { > - float32 frs1 = check_nanbox_s(rs1); > + float32 frs1 = check_nanbox_s(env, rs1); > return float32_to_float64(frs1, &env->fp_status); > } > > @@ -539,14 +540,14 @@ uint64_t helper_fcvt_h_lu(CPURISCVState *env, target_ulong rs1) > > uint64_t helper_fcvt_h_s(CPURISCVState *env, uint64_t rs1) > { > - float32 frs1 = check_nanbox_s(rs1); > + float32 frs1 = check_nanbox_s(env, rs1); > return nanbox_h(float32_to_float16(frs1, true, &env->fp_status)); > } > > uint64_t helper_fcvt_s_h(CPURISCVState *env, uint64_t rs1) > { > float16 frs1 = check_nanbox_h(rs1); > - return nanbox_s(float16_to_float32(frs1, true, &env->fp_status)); > + return nanbox_s(env, float16_to_float32(frs1, true, &env->fp_status)); > } > > uint64_t helper_fcvt_h_d(CPURISCVState *env, uint64_t rs1) > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index 6cf6d6ce98..33f928c76b 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -38,7 +38,7 @@ DEF_HELPER_FLAGS_2(fcvt_s_w, TCG_CALL_NO_RWG, i64, env, tl) > DEF_HELPER_FLAGS_2(fcvt_s_wu, TCG_CALL_NO_RWG, i64, env, tl) > DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, tl) > DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, tl) > -DEF_HELPER_FLAGS_1(fclass_s, TCG_CALL_NO_RWG_SE, tl, i64) > +DEF_HELPER_FLAGS_2(fclass_s, TCG_CALL_NO_RWG_SE, tl, env, i64) > > /* Floating Point - Double Precision */ > DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64) > diff --git a/target/riscv/insn_trans/trans_rvf.c.inc b/target/riscv/insn_trans/trans_rvf.c.inc > index b5459249c4..50c7faf38f 100644 > --- a/target/riscv/insn_trans/trans_rvf.c.inc > +++ b/target/riscv/insn_trans/trans_rvf.c.inc > @@ -20,7 +20,14 @@ > > #define REQUIRE_FPU do {\ > if (ctx->mstatus_fs == 0) \ > - return false; \ > + if (!ctx->ext_zfinx) \ > + return false; \ > +} while (0) > + > +#define REQUIRE_ZFINX_OR_F(ctx) do {\ > + if (!ctx->ext_zfinx) { \ > + REQUIRE_EXT(ctx, RVF); \ > + } \ > } while (0) > > static bool trans_flw(DisasContext *ctx, arg_flw *a) > @@ -70,10 +77,16 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a) > static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); > + > gen_set_rm(ctx, a->rm); > - gen_helper_fmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], > - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); > + gen_helper_fmadd_s(dest, cpu_env, src1, src2, src3); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -81,10 +94,16 @@ static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a) > static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); > + > gen_set_rm(ctx, a->rm); > - gen_helper_fmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], > - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); > + gen_helper_fmsub_s(dest, cpu_env, src1, src2, src3); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -92,10 +111,16 @@ static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a) > static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); > + > gen_set_rm(ctx, a->rm); > - gen_helper_fnmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], > - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); > + gen_helper_fnmsub_s(dest, cpu_env, src1, src2, src3); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -103,10 +128,16 @@ static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a) > static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); > + > gen_set_rm(ctx, a->rm); > - gen_helper_fnmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], > - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); > + gen_helper_fnmadd_s(dest, cpu_env, src1, src2, src3); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -114,11 +145,15 @@ static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a) > static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > > gen_set_rm(ctx, a->rm); > - gen_helper_fadd_s(cpu_fpr[a->rd], cpu_env, > - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); > + gen_helper_fadd_s(dest, cpu_env, src1, src2); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -126,11 +161,15 @@ static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a) > static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > > gen_set_rm(ctx, a->rm); > - gen_helper_fsub_s(cpu_fpr[a->rd], cpu_env, > - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); > + gen_helper_fsub_s(dest, cpu_env, src1, src2); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -138,11 +177,15 @@ static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a) > static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > > gen_set_rm(ctx, a->rm); > - gen_helper_fmul_s(cpu_fpr[a->rd], cpu_env, > - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); > + gen_helper_fmul_s(dest, cpu_env, src1, src2); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -150,11 +193,15 @@ static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a) > static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > > gen_set_rm(ctx, a->rm); > - gen_helper_fdiv_s(cpu_fpr[a->rd], cpu_env, > - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); > + gen_helper_fdiv_s(dest, cpu_env, src1, src2); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -162,10 +209,14 @@ static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a) > static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > > gen_set_rm(ctx, a->rm); > - gen_helper_fsqrt_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); > + gen_helper_fsqrt_s(dest, cpu_env, src1); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -173,22 +224,37 @@ static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a) > static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > > if (a->rs1 == a->rs2) { /* FMOV */ > - gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > + if (!ctx->ext_zfinx) { > + gen_check_nanbox_s(dest, src1); > + } else { > + tcg_gen_ext32s_i64(dest, src1); > + } > } else { /* FSGNJ */ > - TCGv_i64 rs1 = tcg_temp_new_i64(); > - TCGv_i64 rs2 = tcg_temp_new_i64(); > - > - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > - > - /* This formulation retains the nanboxing of rs2. */ > - tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); > - tcg_temp_free_i64(rs1); > - tcg_temp_free_i64(rs2); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > + > + if (!ctx->ext_zfinx) { > + TCGv_i64 rs1 = tcg_temp_new_i64(); > + TCGv_i64 rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, src1); > + gen_check_nanbox_s(rs2, src2); > + > + /* This formulation retains the nanboxing of rs2 in normal 'F'. */ > + tcg_gen_deposit_i64(dest, rs2, rs1, 0, 31); > + > + tcg_temp_free_i64(rs1); > + tcg_temp_free_i64(rs2); > + } else { > + tcg_gen_deposit_i64(dest, src2, src1, 0, 31); > + tcg_gen_ext32s_i64(dest, dest); > + } > } > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -198,16 +264,27 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) > TCGv_i64 rs1, rs2, mask; > > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > - rs1 = tcg_temp_new_i64(); > - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > > + rs1 = tcg_temp_new_i64(); > + if (!ctx->ext_zfinx) { > + gen_check_nanbox_s(rs1, src1); > + } else { > + tcg_gen_mov_i64(rs1, src1); > + } > if (a->rs1 == a->rs2) { /* FNEG */ > - tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); > + tcg_gen_xori_i64(dest, rs1, MAKE_64BIT_MASK(31, 1)); > } else { > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > rs2 = tcg_temp_new_i64(); > - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + if (!ctx->ext_zfinx) { > + gen_check_nanbox_s(rs2, src2); > + } else { > + tcg_gen_mov_i64(rs2, src2); > + } > > /* > * Replace bit 31 in rs1 with inverse in rs2. > @@ -215,13 +292,17 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) > */ > mask = tcg_constant_i64(~MAKE_64BIT_MASK(31, 1)); > tcg_gen_nor_i64(rs2, rs2, mask); > - tcg_gen_and_i64(rs1, mask, rs1); > - tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); > + tcg_gen_and_i64(dest, mask, rs1); > + tcg_gen_or_i64(dest, dest, rs2); > > tcg_temp_free_i64(rs2); > } > + /* signed-extended intead of nanboxing for result if enable zfinx */ > + if (ctx->ext_zfinx) { > + tcg_gen_ext32s_i64(dest, dest); > + } > + gen_set_fpr_hs(ctx, a->rd, dest); > tcg_temp_free_i64(rs1); > - > mark_fs_dirty(ctx); > return true; > } > @@ -231,28 +312,45 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) > TCGv_i64 rs1, rs2; > > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > rs1 = tcg_temp_new_i64(); > - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > + if (!ctx->ext_zfinx) { > + gen_check_nanbox_s(rs1, src1); > + } else { > + tcg_gen_mov_i64(rs1, src1); > + } > > if (a->rs1 == a->rs2) { /* FABS */ > - tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); > + tcg_gen_andi_i64(dest, rs1, ~MAKE_64BIT_MASK(31, 1)); > } else { > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > rs2 = tcg_temp_new_i64(); > - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + if (!ctx->ext_zfinx) { > + gen_check_nanbox_s(rs2, src2); > + } else { > + tcg_gen_mov_i64(rs2, src2); > + } > > /* > * Xor bit 31 in rs1 with that in rs2. > * This formulation retains the nanboxing of rs1. > */ > - tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); > - tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); > + tcg_gen_andi_i64(dest, rs2, MAKE_64BIT_MASK(31, 1)); > + tcg_gen_xor_i64(dest, rs1, dest); > > tcg_temp_free_i64(rs2); > } > + /* signed-extended intead of nanboxing for result if enable zfinx */ > + if (ctx->ext_zfinx) { > + tcg_gen_ext32s_i64(dest, dest); > + } > tcg_temp_free_i64(rs1); > - > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -260,10 +358,14 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) > static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > > - gen_helper_fmin_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], > - cpu_fpr[a->rs2]); > + gen_helper_fmin_s(dest, cpu_env, src1, src2); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -271,10 +373,14 @@ static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a) > static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > + > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > > - gen_helper_fmax_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], > - cpu_fpr[a->rs2]); > + gen_helper_fmax_s(dest, cpu_env, src1, src2); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -282,12 +388,13 @@ static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a) > static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > TCGv dest = dest_gpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > > gen_set_rm(ctx, a->rm); > - gen_helper_fcvt_w_s(dest, cpu_env, cpu_fpr[a->rs1]); > + gen_helper_fcvt_w_s(dest, cpu_env, src1); > gen_set_gpr(ctx, a->rd, dest); > return true; > } > @@ -295,12 +402,13 @@ static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a) > static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > TCGv dest = dest_gpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > > gen_set_rm(ctx, a->rm); > - gen_helper_fcvt_wu_s(dest, cpu_env, cpu_fpr[a->rs1]); > + gen_helper_fcvt_wu_s(dest, cpu_env, src1); > gen_set_gpr(ctx, a->rd, dest); > return true; > } > @@ -309,14 +417,14 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a) > { > /* NOTE: This was FMV.X.S in an earlier version of the ISA spec! */ > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > TCGv dest = dest_gpr(ctx, a->rd); > - > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > #if defined(TARGET_RISCV64) > - tcg_gen_ext32s_tl(dest, cpu_fpr[a->rs1]); > + tcg_gen_ext32s_tl(dest, src1); > #else > - tcg_gen_extrl_i64_i32(dest, cpu_fpr[a->rs1]); > + tcg_gen_extrl_i64_i32(dest, src1); > #endif > > gen_set_gpr(ctx, a->rd, dest); > @@ -326,11 +434,13 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a) > static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > TCGv dest = dest_gpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > > - gen_helper_feq_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); > + gen_helper_feq_s(dest, cpu_env, src1, src2); > gen_set_gpr(ctx, a->rd, dest); > return true; > } > @@ -338,11 +448,13 @@ static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a) > static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > TCGv dest = dest_gpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > > - gen_helper_flt_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); > + gen_helper_flt_s(dest, cpu_env, src1, src2); > gen_set_gpr(ctx, a->rd, dest); > return true; > } > @@ -350,11 +462,13 @@ static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a) > static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > TCGv dest = dest_gpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); > > - gen_helper_fle_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); > + gen_helper_fle_s(dest, cpu_env, src1, src2); > gen_set_gpr(ctx, a->rd, dest); > return true; > } > @@ -362,11 +476,12 @@ static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a) > static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > TCGv dest = dest_gpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > > - gen_helper_fclass_s(dest, cpu_fpr[a->rs1]); > + gen_helper_fclass_s(dest, cpu_env, src1); > gen_set_gpr(ctx, a->rd, dest); > return true; > } > @@ -374,13 +489,14 @@ static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) > static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); > > gen_set_rm(ctx, a->rm); > - gen_helper_fcvt_s_w(cpu_fpr[a->rd], cpu_env, src); > - > + gen_helper_fcvt_s_w(dest, cpu_env, src); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -388,13 +504,14 @@ static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a) > static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a) > { > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); > > gen_set_rm(ctx, a->rm); > - gen_helper_fcvt_s_wu(cpu_fpr[a->rd], cpu_env, src); > - > + gen_helper_fcvt_s_wu(dest, cpu_env, src); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -403,13 +520,14 @@ static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a) > { > /* NOTE: This was FMV.S.X in an earlier version of the ISA spec! */ > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); > > - tcg_gen_extu_tl_i64(cpu_fpr[a->rd], src); > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > - > + tcg_gen_extu_tl_i64(dest, src); > + gen_nanbox_s(dest, dest); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -418,12 +536,13 @@ static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a) > { > REQUIRE_64BIT(ctx); > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > TCGv dest = dest_gpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > > gen_set_rm(ctx, a->rm); > - gen_helper_fcvt_l_s(dest, cpu_env, cpu_fpr[a->rs1]); > + gen_helper_fcvt_l_s(dest, cpu_env, src1); > gen_set_gpr(ctx, a->rd, dest); > return true; > } > @@ -432,12 +551,13 @@ static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a) > { > REQUIRE_64BIT(ctx); > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > TCGv dest = dest_gpr(ctx, a->rd); > + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); > > gen_set_rm(ctx, a->rm); > - gen_helper_fcvt_lu_s(dest, cpu_env, cpu_fpr[a->rs1]); > + gen_helper_fcvt_lu_s(dest, cpu_env, src1); > gen_set_gpr(ctx, a->rd, dest); > return true; > } > @@ -446,13 +566,14 @@ static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a) > { > REQUIRE_64BIT(ctx); > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); > > gen_set_rm(ctx, a->rm); > - gen_helper_fcvt_s_l(cpu_fpr[a->rd], cpu_env, src); > - > + gen_helper_fcvt_s_l(dest, cpu_env, src); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > @@ -461,13 +582,14 @@ static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a) > { > REQUIRE_64BIT(ctx); > REQUIRE_FPU; > - REQUIRE_EXT(ctx, RVF); > + REQUIRE_ZFINX_OR_F(ctx); > > + TCGv_i64 dest = dest_fpr(ctx, a->rd); > TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); > > gen_set_rm(ctx, a->rm); > - gen_helper_fcvt_s_lu(cpu_fpr[a->rd], cpu_env, src); > - > + gen_helper_fcvt_s_lu(dest, cpu_env, src); > + gen_set_fpr_hs(ctx, a->rd, dest); > mark_fs_dirty(ctx); > return true; > } > diff --git a/target/riscv/internals.h b/target/riscv/internals.h > index 065e8162a2..6237bb3115 100644 > --- a/target/riscv/internals.h > +++ b/target/riscv/internals.h > @@ -46,13 +46,23 @@ enum { > RISCV_FRM_ROD = 8, /* Round to Odd */ > }; > > -static inline uint64_t nanbox_s(float32 f) > +static inline uint64_t nanbox_s(CPURISCVState *env, float32 f) > { > - return f | MAKE_64BIT_MASK(32, 32); > + /* the value is sign-extended instead of NaN-boxing for zfinx */ > + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { > + return (int32_t)f; > + } else { > + return f | MAKE_64BIT_MASK(32, 32); > + } > } > > -static inline float32 check_nanbox_s(uint64_t f) > +static inline float32 check_nanbox_s(CPURISCVState *env, uint64_t f) > { > + /* Disable NaN-boxing check when enable zfinx */ > + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { > + return (uint32_t)f; > + } > + > uint64_t mask = MAKE_64BIT_MASK(32, 32); > > if (likely((f & mask) == mask)) { > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 8f01063618..e3a30c4748 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -105,10 +105,13 @@ typedef struct DisasContext { > target_ulong vstart; > bool vl_eq_vlmax; > uint8_t ntemp; > + uint8_t nftemp; > CPUState *cs; > TCGv zero; > /* Space for 3 operands plus 1 extra for address computation. */ > TCGv temp[4]; > + /* Space for 4 float point operands */ > + TCGv_i64 ftemp[4]; It might be worth keeping all 3 together with a comment on what/why they are used. > /* PointerMasking extension */ > bool pm_enabled; > TCGv pm_mask; > @@ -359,6 +362,86 @@ static void gen_set_gpr128(DisasContext *ctx, int reg_num, TCGv rl, TCGv rh) > } > } > > +static TCGv_i64 ftemp_new(DisasContext *ctx) > +{ > + assert(ctx->nftemp < ARRAY_SIZE(ctx->ftemp)); > + return ctx->ftemp[ctx->nftemp++] = tcg_temp_new_i64(); > +} > + > +static TCGv_i64 get_fpr_hs(DisasContext *ctx, int reg_num) > +{ > + if (!ctx->ext_zfinx) { > + return cpu_fpr[reg_num]; > + } > + > + if (reg_num == 0) { > + return tcg_constant_i64(0); > + } > + switch (get_xl(ctx)) { > + case MXL_RV32: > +#ifdef TARGET_RISCV32 > + { > + TCGv_i64 t = ftemp_new(ctx); > + tcg_gen_ext_i32_i64(t, cpu_gpr[reg_num]); > + return t; > + } > +#else > + /* fall through */ > + case MXL_RV64: > + return cpu_gpr[reg_num]; > +#endif > + default: > + g_assert_not_reached(); > + } > +} > + > +static TCGv_i64 dest_fpr(DisasContext *ctx, int reg_num) > +{ > + if (!ctx->ext_zfinx) { > + return cpu_fpr[reg_num]; > + } > + > + switch (get_xl(ctx)) { > + case MXL_RV32: > + return ftemp_new(ctx); > +#ifdef TARGET_RISCV64 > + case MXL_RV64: > + if (reg_num == 0) { > + return ftemp_new(ctx); > + } else { > + return cpu_gpr[reg_num]; > + } > +#endif You don't need the define here as you can only get to MXL_RV64 on a 64-bit target. Same with the else clauses on the other #ifdef's > + default: > + g_assert_not_reached(); > + } > +} > + > +/* assume t is nanboxing (for normal) or sign-extended (for zfinx) */ > +static void gen_set_fpr_hs(DisasContext *ctx, int reg_num, TCGv_i64 t) > +{ > + if (!ctx->ext_zfinx) { > + tcg_gen_mov_i64(cpu_fpr[reg_num], t); > + return; > + } > + if (reg_num != 0) { > + switch (get_xl(ctx)) { > + case MXL_RV32: > +#ifdef TARGET_RISCV32 > + tcg_gen_extrl_i64_i32(cpu_gpr[reg_num], t); > + break; > +#else > + /* fall through */ > + case MXL_RV64: > + tcg_gen_mov_i64(cpu_gpr[reg_num], t); > + break; > +#endif > + default: > + g_assert_not_reached(); > + } > + } > +} > + > static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) > { > target_ulong next_pc; > @@ -921,6 +1004,8 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) > ctx->cs = cs; > ctx->ntemp = 0; > memset(ctx->temp, 0, sizeof(ctx->temp)); > + ctx->nftemp = 0; > + memset(ctx->ftemp, 0, sizeof(ctx->ftemp)); > ctx->pm_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_ENABLED); > int priv = tb_flags & TB_FLAGS_PRIV_MMU_MASK; > ctx->pm_mask = pm_mask[priv]; > @@ -955,6 +1040,11 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) > ctx->temp[i] = NULL; > } > ctx->ntemp = 0; > + for (int i = ctx->nftemp - 1; i >= 0; --i) { Don't declare the variable in the for loop Alistair > + tcg_temp_free_i64(ctx->ftemp[i]); > + ctx->ftemp[i] = NULL; > + } > + ctx->nftemp = 0; > > if (ctx->base.is_jmp == DISAS_NEXT) { > target_ulong page_start; > -- > 2.17.1 > >
在 2022/1/28 下午2:09, Alistair Francis 写道: > On Thu, Jan 13, 2022 at 11:52 AM Weiwei Li <liweiwei@iscas.ac.cn> wrote: >> - update extension check REQUIRE_ZFINX_OR_F >> - update single float point register read/write >> - disable nanbox_s check >> >> Co-authored-by: ardxwe <ardxwe@gmail.com> >> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> >> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> >> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> target/riscv/fpu_helper.c | 89 +++---- >> target/riscv/helper.h | 2 +- >> target/riscv/insn_trans/trans_rvf.c.inc | 314 ++++++++++++++++-------- >> target/riscv/internals.h | 16 +- >> target/riscv/translate.c | 90 +++++++ >> 5 files changed, 367 insertions(+), 144 deletions(-) >> >> diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c >> index 4a5982d594..63ca703459 100644 >> --- a/target/riscv/fpu_helper.c >> +++ b/target/riscv/fpu_helper.c >> @@ -98,10 +98,11 @@ static uint64_t do_fmadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2, >> static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2, >> uint64_t rs3, int flags) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> - float32 frs3 = check_nanbox_s(rs3); >> - return nanbox_s(float32_muladd(frs1, frs2, frs3, flags, &env->fp_status)); >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> + float32 frs3 = check_nanbox_s(env, rs3); >> + return nanbox_s(env, float32_muladd(frs1, frs2, frs3, flags, >> + &env->fp_status)); >> } >> >> uint64_t helper_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, >> @@ -183,124 +184,124 @@ uint64_t helper_fnmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2, >> >> uint64_t helper_fadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> - return nanbox_s(float32_add(frs1, frs2, &env->fp_status)); >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> + return nanbox_s(env, float32_add(frs1, frs2, &env->fp_status)); >> } >> >> uint64_t helper_fsub_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> - return nanbox_s(float32_sub(frs1, frs2, &env->fp_status)); >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> + return nanbox_s(env, float32_sub(frs1, frs2, &env->fp_status)); >> } >> >> uint64_t helper_fmul_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> - return nanbox_s(float32_mul(frs1, frs2, &env->fp_status)); >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> + return nanbox_s(env, float32_mul(frs1, frs2, &env->fp_status)); >> } >> >> uint64_t helper_fdiv_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> - return nanbox_s(float32_div(frs1, frs2, &env->fp_status)); >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> + return nanbox_s(env, float32_div(frs1, frs2, &env->fp_status)); >> } >> >> uint64_t helper_fmin_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> - return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ? >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> + return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ? >> float32_minnum(frs1, frs2, &env->fp_status) : >> float32_minimum_number(frs1, frs2, &env->fp_status)); >> } >> >> uint64_t helper_fmax_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> - return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ? >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> + return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ? >> float32_maxnum(frs1, frs2, &env->fp_status) : >> float32_maximum_number(frs1, frs2, &env->fp_status)); >> } >> >> uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t rs1) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - return nanbox_s(float32_sqrt(frs1, &env->fp_status)); >> + float32 frs1 = check_nanbox_s(env, rs1); >> + return nanbox_s(env, float32_sqrt(frs1, &env->fp_status)); >> } >> >> target_ulong helper_fle_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> return float32_le(frs1, frs2, &env->fp_status); >> } >> >> target_ulong helper_flt_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> return float32_lt(frs1, frs2, &env->fp_status); >> } >> >> target_ulong helper_feq_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> - float32 frs2 = check_nanbox_s(rs2); >> + float32 frs1 = check_nanbox_s(env, rs1); >> + float32 frs2 = check_nanbox_s(env, rs2); >> return float32_eq_quiet(frs1, frs2, &env->fp_status); >> } >> >> target_ulong helper_fcvt_w_s(CPURISCVState *env, uint64_t rs1) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> + float32 frs1 = check_nanbox_s(env, rs1); >> return float32_to_int32(frs1, &env->fp_status); >> } >> >> target_ulong helper_fcvt_wu_s(CPURISCVState *env, uint64_t rs1) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> + float32 frs1 = check_nanbox_s(env, rs1); >> return (int32_t)float32_to_uint32(frs1, &env->fp_status); >> } >> >> target_ulong helper_fcvt_l_s(CPURISCVState *env, uint64_t rs1) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> + float32 frs1 = check_nanbox_s(env, rs1); >> return float32_to_int64(frs1, &env->fp_status); >> } >> >> target_ulong helper_fcvt_lu_s(CPURISCVState *env, uint64_t rs1) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> + float32 frs1 = check_nanbox_s(env, rs1); >> return float32_to_uint64(frs1, &env->fp_status); >> } >> >> uint64_t helper_fcvt_s_w(CPURISCVState *env, target_ulong rs1) >> { >> - return nanbox_s(int32_to_float32((int32_t)rs1, &env->fp_status)); >> + return nanbox_s(env, int32_to_float32((int32_t)rs1, &env->fp_status)); >> } >> >> uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1) >> { >> - return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status)); >> + return nanbox_s(env, uint32_to_float32((uint32_t)rs1, &env->fp_status)); >> } >> >> uint64_t helper_fcvt_s_l(CPURISCVState *env, target_ulong rs1) >> { >> - return nanbox_s(int64_to_float32(rs1, &env->fp_status)); >> + return nanbox_s(env, int64_to_float32(rs1, &env->fp_status)); >> } >> >> uint64_t helper_fcvt_s_lu(CPURISCVState *env, target_ulong rs1) >> { >> - return nanbox_s(uint64_to_float32(rs1, &env->fp_status)); >> + return nanbox_s(env, uint64_to_float32(rs1, &env->fp_status)); >> } >> >> -target_ulong helper_fclass_s(uint64_t rs1) >> +target_ulong helper_fclass_s(CPURISCVState *env, uint64_t rs1) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> + float32 frs1 = check_nanbox_s(env, rs1); >> return fclass_s(frs1); >> } >> >> @@ -340,12 +341,12 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) >> >> uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1) >> { >> - return nanbox_s(float64_to_float32(rs1, &env->fp_status)); >> + return nanbox_s(env, float64_to_float32(rs1, &env->fp_status)); >> } >> >> uint64_t helper_fcvt_d_s(CPURISCVState *env, uint64_t rs1) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> + float32 frs1 = check_nanbox_s(env, rs1); >> return float32_to_float64(frs1, &env->fp_status); >> } >> >> @@ -539,14 +540,14 @@ uint64_t helper_fcvt_h_lu(CPURISCVState *env, target_ulong rs1) >> >> uint64_t helper_fcvt_h_s(CPURISCVState *env, uint64_t rs1) >> { >> - float32 frs1 = check_nanbox_s(rs1); >> + float32 frs1 = check_nanbox_s(env, rs1); >> return nanbox_h(float32_to_float16(frs1, true, &env->fp_status)); >> } >> >> uint64_t helper_fcvt_s_h(CPURISCVState *env, uint64_t rs1) >> { >> float16 frs1 = check_nanbox_h(rs1); >> - return nanbox_s(float16_to_float32(frs1, true, &env->fp_status)); >> + return nanbox_s(env, float16_to_float32(frs1, true, &env->fp_status)); >> } >> >> uint64_t helper_fcvt_h_d(CPURISCVState *env, uint64_t rs1) >> diff --git a/target/riscv/helper.h b/target/riscv/helper.h >> index 6cf6d6ce98..33f928c76b 100644 >> --- a/target/riscv/helper.h >> +++ b/target/riscv/helper.h >> @@ -38,7 +38,7 @@ DEF_HELPER_FLAGS_2(fcvt_s_w, TCG_CALL_NO_RWG, i64, env, tl) >> DEF_HELPER_FLAGS_2(fcvt_s_wu, TCG_CALL_NO_RWG, i64, env, tl) >> DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, tl) >> DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, tl) >> -DEF_HELPER_FLAGS_1(fclass_s, TCG_CALL_NO_RWG_SE, tl, i64) >> +DEF_HELPER_FLAGS_2(fclass_s, TCG_CALL_NO_RWG_SE, tl, env, i64) >> >> /* Floating Point - Double Precision */ >> DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64) >> diff --git a/target/riscv/insn_trans/trans_rvf.c.inc b/target/riscv/insn_trans/trans_rvf.c.inc >> index b5459249c4..50c7faf38f 100644 >> --- a/target/riscv/insn_trans/trans_rvf.c.inc >> +++ b/target/riscv/insn_trans/trans_rvf.c.inc >> @@ -20,7 +20,14 @@ >> >> #define REQUIRE_FPU do {\ >> if (ctx->mstatus_fs == 0) \ >> - return false; \ >> + if (!ctx->ext_zfinx) \ >> + return false; \ >> +} while (0) >> + >> +#define REQUIRE_ZFINX_OR_F(ctx) do {\ >> + if (!ctx->ext_zfinx) { \ >> + REQUIRE_EXT(ctx, RVF); \ >> + } \ >> } while (0) >> >> static bool trans_flw(DisasContext *ctx, arg_flw *a) >> @@ -70,10 +77,16 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a) >> static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); >> + >> gen_set_rm(ctx, a->rm); >> - gen_helper_fmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], >> - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); >> + gen_helper_fmadd_s(dest, cpu_env, src1, src2, src3); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -81,10 +94,16 @@ static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a) >> static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); >> + >> gen_set_rm(ctx, a->rm); >> - gen_helper_fmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], >> - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); >> + gen_helper_fmsub_s(dest, cpu_env, src1, src2, src3); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -92,10 +111,16 @@ static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a) >> static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); >> + >> gen_set_rm(ctx, a->rm); >> - gen_helper_fnmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], >> - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); >> + gen_helper_fnmsub_s(dest, cpu_env, src1, src2, src3); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -103,10 +128,16 @@ static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a) >> static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); >> + >> gen_set_rm(ctx, a->rm); >> - gen_helper_fnmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], >> - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); >> + gen_helper_fnmadd_s(dest, cpu_env, src1, src2, src3); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -114,11 +145,15 @@ static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a) >> static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fadd_s(cpu_fpr[a->rd], cpu_env, >> - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); >> + gen_helper_fadd_s(dest, cpu_env, src1, src2); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -126,11 +161,15 @@ static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a) >> static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fsub_s(cpu_fpr[a->rd], cpu_env, >> - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); >> + gen_helper_fsub_s(dest, cpu_env, src1, src2); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -138,11 +177,15 @@ static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a) >> static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fmul_s(cpu_fpr[a->rd], cpu_env, >> - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); >> + gen_helper_fmul_s(dest, cpu_env, src1, src2); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -150,11 +193,15 @@ static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a) >> static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fdiv_s(cpu_fpr[a->rd], cpu_env, >> - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); >> + gen_helper_fdiv_s(dest, cpu_env, src1, src2); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -162,10 +209,14 @@ static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a) >> static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fsqrt_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); >> + gen_helper_fsqrt_s(dest, cpu_env, src1); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -173,22 +224,37 @@ static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a) >> static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> >> if (a->rs1 == a->rs2) { /* FMOV */ >> - gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); >> + if (!ctx->ext_zfinx) { >> + gen_check_nanbox_s(dest, src1); >> + } else { >> + tcg_gen_ext32s_i64(dest, src1); >> + } >> } else { /* FSGNJ */ >> - TCGv_i64 rs1 = tcg_temp_new_i64(); >> - TCGv_i64 rs2 = tcg_temp_new_i64(); >> - >> - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); >> - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); >> - >> - /* This formulation retains the nanboxing of rs2. */ >> - tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); >> - tcg_temp_free_i64(rs1); >> - tcg_temp_free_i64(rs2); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> + >> + if (!ctx->ext_zfinx) { >> + TCGv_i64 rs1 = tcg_temp_new_i64(); >> + TCGv_i64 rs2 = tcg_temp_new_i64(); >> + gen_check_nanbox_s(rs1, src1); >> + gen_check_nanbox_s(rs2, src2); >> + >> + /* This formulation retains the nanboxing of rs2 in normal 'F'. */ >> + tcg_gen_deposit_i64(dest, rs2, rs1, 0, 31); >> + >> + tcg_temp_free_i64(rs1); >> + tcg_temp_free_i64(rs2); >> + } else { >> + tcg_gen_deposit_i64(dest, src2, src1, 0, 31); >> + tcg_gen_ext32s_i64(dest, dest); >> + } >> } >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -198,16 +264,27 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) >> TCGv_i64 rs1, rs2, mask; >> >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> - rs1 = tcg_temp_new_i64(); >> - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> >> + rs1 = tcg_temp_new_i64(); >> + if (!ctx->ext_zfinx) { >> + gen_check_nanbox_s(rs1, src1); >> + } else { >> + tcg_gen_mov_i64(rs1, src1); >> + } >> if (a->rs1 == a->rs2) { /* FNEG */ >> - tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); >> + tcg_gen_xori_i64(dest, rs1, MAKE_64BIT_MASK(31, 1)); >> } else { >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> rs2 = tcg_temp_new_i64(); >> - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); >> + if (!ctx->ext_zfinx) { >> + gen_check_nanbox_s(rs2, src2); >> + } else { >> + tcg_gen_mov_i64(rs2, src2); >> + } >> >> /* >> * Replace bit 31 in rs1 with inverse in rs2. >> @@ -215,13 +292,17 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) >> */ >> mask = tcg_constant_i64(~MAKE_64BIT_MASK(31, 1)); >> tcg_gen_nor_i64(rs2, rs2, mask); >> - tcg_gen_and_i64(rs1, mask, rs1); >> - tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); >> + tcg_gen_and_i64(dest, mask, rs1); >> + tcg_gen_or_i64(dest, dest, rs2); >> >> tcg_temp_free_i64(rs2); >> } >> + /* signed-extended intead of nanboxing for result if enable zfinx */ >> + if (ctx->ext_zfinx) { >> + tcg_gen_ext32s_i64(dest, dest); >> + } >> + gen_set_fpr_hs(ctx, a->rd, dest); >> tcg_temp_free_i64(rs1); >> - >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -231,28 +312,45 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) >> TCGv_i64 rs1, rs2; >> >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> rs1 = tcg_temp_new_i64(); >> - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); >> + >> + if (!ctx->ext_zfinx) { >> + gen_check_nanbox_s(rs1, src1); >> + } else { >> + tcg_gen_mov_i64(rs1, src1); >> + } >> >> if (a->rs1 == a->rs2) { /* FABS */ >> - tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); >> + tcg_gen_andi_i64(dest, rs1, ~MAKE_64BIT_MASK(31, 1)); >> } else { >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> rs2 = tcg_temp_new_i64(); >> - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); >> + >> + if (!ctx->ext_zfinx) { >> + gen_check_nanbox_s(rs2, src2); >> + } else { >> + tcg_gen_mov_i64(rs2, src2); >> + } >> >> /* >> * Xor bit 31 in rs1 with that in rs2. >> * This formulation retains the nanboxing of rs1. >> */ >> - tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); >> - tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); >> + tcg_gen_andi_i64(dest, rs2, MAKE_64BIT_MASK(31, 1)); >> + tcg_gen_xor_i64(dest, rs1, dest); >> >> tcg_temp_free_i64(rs2); >> } >> + /* signed-extended intead of nanboxing for result if enable zfinx */ >> + if (ctx->ext_zfinx) { >> + tcg_gen_ext32s_i64(dest, dest); >> + } >> tcg_temp_free_i64(rs1); >> - >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -260,10 +358,14 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) >> static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> >> - gen_helper_fmin_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], >> - cpu_fpr[a->rs2]); >> + gen_helper_fmin_s(dest, cpu_env, src1, src2); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -271,10 +373,14 @@ static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a) >> static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> + >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> >> - gen_helper_fmax_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], >> - cpu_fpr[a->rs2]); >> + gen_helper_fmax_s(dest, cpu_env, src1, src2); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -282,12 +388,13 @@ static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a) >> static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> TCGv dest = dest_gpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fcvt_w_s(dest, cpu_env, cpu_fpr[a->rs1]); >> + gen_helper_fcvt_w_s(dest, cpu_env, src1); >> gen_set_gpr(ctx, a->rd, dest); >> return true; >> } >> @@ -295,12 +402,13 @@ static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a) >> static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> TCGv dest = dest_gpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fcvt_wu_s(dest, cpu_env, cpu_fpr[a->rs1]); >> + gen_helper_fcvt_wu_s(dest, cpu_env, src1); >> gen_set_gpr(ctx, a->rd, dest); >> return true; >> } >> @@ -309,14 +417,14 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a) >> { >> /* NOTE: This was FMV.X.S in an earlier version of the ISA spec! */ >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> TCGv dest = dest_gpr(ctx, a->rd); >> - >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> #if defined(TARGET_RISCV64) >> - tcg_gen_ext32s_tl(dest, cpu_fpr[a->rs1]); >> + tcg_gen_ext32s_tl(dest, src1); >> #else >> - tcg_gen_extrl_i64_i32(dest, cpu_fpr[a->rs1]); >> + tcg_gen_extrl_i64_i32(dest, src1); >> #endif >> >> gen_set_gpr(ctx, a->rd, dest); >> @@ -326,11 +434,13 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a) >> static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> TCGv dest = dest_gpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> >> - gen_helper_feq_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); >> + gen_helper_feq_s(dest, cpu_env, src1, src2); >> gen_set_gpr(ctx, a->rd, dest); >> return true; >> } >> @@ -338,11 +448,13 @@ static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a) >> static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> TCGv dest = dest_gpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> >> - gen_helper_flt_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); >> + gen_helper_flt_s(dest, cpu_env, src1, src2); >> gen_set_gpr(ctx, a->rd, dest); >> return true; >> } >> @@ -350,11 +462,13 @@ static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a) >> static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> TCGv dest = dest_gpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); >> >> - gen_helper_fle_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); >> + gen_helper_fle_s(dest, cpu_env, src1, src2); >> gen_set_gpr(ctx, a->rd, dest); >> return true; >> } >> @@ -362,11 +476,12 @@ static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a) >> static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> TCGv dest = dest_gpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> >> - gen_helper_fclass_s(dest, cpu_fpr[a->rs1]); >> + gen_helper_fclass_s(dest, cpu_env, src1); >> gen_set_gpr(ctx, a->rd, dest); >> return true; >> } >> @@ -374,13 +489,14 @@ static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) >> static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fcvt_s_w(cpu_fpr[a->rd], cpu_env, src); >> - >> + gen_helper_fcvt_s_w(dest, cpu_env, src); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -388,13 +504,14 @@ static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a) >> static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a) >> { >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fcvt_s_wu(cpu_fpr[a->rd], cpu_env, src); >> - >> + gen_helper_fcvt_s_wu(dest, cpu_env, src); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -403,13 +520,14 @@ static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a) >> { >> /* NOTE: This was FMV.S.X in an earlier version of the ISA spec! */ >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); >> >> - tcg_gen_extu_tl_i64(cpu_fpr[a->rd], src); >> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); >> - >> + tcg_gen_extu_tl_i64(dest, src); >> + gen_nanbox_s(dest, dest); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -418,12 +536,13 @@ static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a) >> { >> REQUIRE_64BIT(ctx); >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> TCGv dest = dest_gpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fcvt_l_s(dest, cpu_env, cpu_fpr[a->rs1]); >> + gen_helper_fcvt_l_s(dest, cpu_env, src1); >> gen_set_gpr(ctx, a->rd, dest); >> return true; >> } >> @@ -432,12 +551,13 @@ static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a) >> { >> REQUIRE_64BIT(ctx); >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> TCGv dest = dest_gpr(ctx, a->rd); >> + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fcvt_lu_s(dest, cpu_env, cpu_fpr[a->rs1]); >> + gen_helper_fcvt_lu_s(dest, cpu_env, src1); >> gen_set_gpr(ctx, a->rd, dest); >> return true; >> } >> @@ -446,13 +566,14 @@ static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a) >> { >> REQUIRE_64BIT(ctx); >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fcvt_s_l(cpu_fpr[a->rd], cpu_env, src); >> - >> + gen_helper_fcvt_s_l(dest, cpu_env, src); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> @@ -461,13 +582,14 @@ static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a) >> { >> REQUIRE_64BIT(ctx); >> REQUIRE_FPU; >> - REQUIRE_EXT(ctx, RVF); >> + REQUIRE_ZFINX_OR_F(ctx); >> >> + TCGv_i64 dest = dest_fpr(ctx, a->rd); >> TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); >> >> gen_set_rm(ctx, a->rm); >> - gen_helper_fcvt_s_lu(cpu_fpr[a->rd], cpu_env, src); >> - >> + gen_helper_fcvt_s_lu(dest, cpu_env, src); >> + gen_set_fpr_hs(ctx, a->rd, dest); >> mark_fs_dirty(ctx); >> return true; >> } >> diff --git a/target/riscv/internals.h b/target/riscv/internals.h >> index 065e8162a2..6237bb3115 100644 >> --- a/target/riscv/internals.h >> +++ b/target/riscv/internals.h >> @@ -46,13 +46,23 @@ enum { >> RISCV_FRM_ROD = 8, /* Round to Odd */ >> }; >> >> -static inline uint64_t nanbox_s(float32 f) >> +static inline uint64_t nanbox_s(CPURISCVState *env, float32 f) >> { >> - return f | MAKE_64BIT_MASK(32, 32); >> + /* the value is sign-extended instead of NaN-boxing for zfinx */ >> + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { >> + return (int32_t)f; >> + } else { >> + return f | MAKE_64BIT_MASK(32, 32); >> + } >> } >> >> -static inline float32 check_nanbox_s(uint64_t f) >> +static inline float32 check_nanbox_s(CPURISCVState *env, uint64_t f) >> { >> + /* Disable NaN-boxing check when enable zfinx */ >> + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { >> + return (uint32_t)f; >> + } >> + >> uint64_t mask = MAKE_64BIT_MASK(32, 32); >> >> if (likely((f & mask) == mask)) { >> diff --git a/target/riscv/translate.c b/target/riscv/translate.c >> index 8f01063618..e3a30c4748 100644 >> --- a/target/riscv/translate.c >> +++ b/target/riscv/translate.c >> @@ -105,10 +105,13 @@ typedef struct DisasContext { >> target_ulong vstart; >> bool vl_eq_vlmax; >> uint8_t ntemp; >> + uint8_t nftemp; >> CPUState *cs; >> TCGv zero; >> /* Space for 3 operands plus 1 extra for address computation. */ >> TCGv temp[4]; >> + /* Space for 4 float point operands */ >> + TCGv_i64 ftemp[4]; > It might be worth keeping all 3 together with a comment on what/why > they are used. OK. I'll put them together. >> /* PointerMasking extension */ >> bool pm_enabled; >> TCGv pm_mask; >> @@ -359,6 +362,86 @@ static void gen_set_gpr128(DisasContext *ctx, int reg_num, TCGv rl, TCGv rh) >> } >> } >> >> +static TCGv_i64 ftemp_new(DisasContext *ctx) >> +{ >> + assert(ctx->nftemp < ARRAY_SIZE(ctx->ftemp)); >> + return ctx->ftemp[ctx->nftemp++] = tcg_temp_new_i64(); >> +} >> + >> +static TCGv_i64 get_fpr_hs(DisasContext *ctx, int reg_num) >> +{ >> + if (!ctx->ext_zfinx) { >> + return cpu_fpr[reg_num]; >> + } >> + >> + if (reg_num == 0) { >> + return tcg_constant_i64(0); >> + } >> + switch (get_xl(ctx)) { >> + case MXL_RV32: >> +#ifdef TARGET_RISCV32 >> + { >> + TCGv_i64 t = ftemp_new(ctx); >> + tcg_gen_ext_i32_i64(t, cpu_gpr[reg_num]); >> + return t; >> + } >> +#else >> + /* fall through */ >> + case MXL_RV64: >> + return cpu_gpr[reg_num]; >> +#endif >> + default: >> + g_assert_not_reached(); >> + } >> +} >> + >> +static TCGv_i64 dest_fpr(DisasContext *ctx, int reg_num) >> +{ >> + if (!ctx->ext_zfinx) { >> + return cpu_fpr[reg_num]; >> + } >> + >> + switch (get_xl(ctx)) { >> + case MXL_RV32: >> + return ftemp_new(ctx); >> +#ifdef TARGET_RISCV64 >> + case MXL_RV64: >> + if (reg_num == 0) { >> + return ftemp_new(ctx); >> + } else { >> + return cpu_gpr[reg_num]; >> + } >> +#endif > You don't need the define here as you can only get to MXL_RV64 on a > 64-bit target. > > Same with the else clauses on the other #ifdef's I use #ifdef here and else to reuse cpu_gpr, since its type is TCGv and what we need here is TCGv_i64. >> + default: >> + g_assert_not_reached(); >> + } >> +} >> + >> +/* assume t is nanboxing (for normal) or sign-extended (for zfinx) */ >> +static void gen_set_fpr_hs(DisasContext *ctx, int reg_num, TCGv_i64 t) >> +{ >> + if (!ctx->ext_zfinx) { >> + tcg_gen_mov_i64(cpu_fpr[reg_num], t); >> + return; >> + } >> + if (reg_num != 0) { >> + switch (get_xl(ctx)) { >> + case MXL_RV32: >> +#ifdef TARGET_RISCV32 >> + tcg_gen_extrl_i64_i32(cpu_gpr[reg_num], t); >> + break; >> +#else >> + /* fall through */ >> + case MXL_RV64: >> + tcg_gen_mov_i64(cpu_gpr[reg_num], t); >> + break; >> +#endif >> + default: >> + g_assert_not_reached(); >> + } >> + } >> +} >> + >> static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) >> { >> target_ulong next_pc; >> @@ -921,6 +1004,8 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) >> ctx->cs = cs; >> ctx->ntemp = 0; >> memset(ctx->temp, 0, sizeof(ctx->temp)); >> + ctx->nftemp = 0; >> + memset(ctx->ftemp, 0, sizeof(ctx->ftemp)); >> ctx->pm_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_ENABLED); >> int priv = tb_flags & TB_FLAGS_PRIV_MMU_MASK; >> ctx->pm_mask = pm_mask[priv]; >> @@ -955,6 +1040,11 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) >> ctx->temp[i] = NULL; >> } >> ctx->ntemp = 0; >> + for (int i = ctx->nftemp - 1; i >= 0; --i) { > Don't declare the variable in the for loop > > Alistair OK. I'll fix this. Regards, Weiwei Li >> + tcg_temp_free_i64(ctx->ftemp[i]); >> + ctx->ftemp[i] = NULL; >> + } >> + ctx->nftemp = 0; >> >> if (ctx->base.is_jmp == DISAS_NEXT) { >> target_ulong page_start; >> -- >> 2.17.1 >> >>
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index 4a5982d594..63ca703459 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -98,10 +98,11 @@ static uint64_t do_fmadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2, static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2, uint64_t rs3, int flags) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - float32 frs3 = check_nanbox_s(rs3); - return nanbox_s(float32_muladd(frs1, frs2, frs3, flags, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + float32 frs3 = check_nanbox_s(env, rs3); + return nanbox_s(env, float32_muladd(frs1, frs2, frs3, flags, + &env->fp_status)); } uint64_t helper_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, @@ -183,124 +184,124 @@ uint64_t helper_fnmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t helper_fadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(float32_add(frs1, frs2, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, float32_add(frs1, frs2, &env->fp_status)); } uint64_t helper_fsub_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(float32_sub(frs1, frs2, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, float32_sub(frs1, frs2, &env->fp_status)); } uint64_t helper_fmul_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(float32_mul(frs1, frs2, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, float32_mul(frs1, frs2, &env->fp_status)); } uint64_t helper_fdiv_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(float32_div(frs1, frs2, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, float32_div(frs1, frs2, &env->fp_status)); } uint64_t helper_fmin_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ? + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ? float32_minnum(frs1, frs2, &env->fp_status) : float32_minimum_number(frs1, frs2, &env->fp_status)); } uint64_t helper_fmax_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ? + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ? float32_maxnum(frs1, frs2, &env->fp_status) : float32_maximum_number(frs1, frs2, &env->fp_status)); } uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); - return nanbox_s(float32_sqrt(frs1, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + return nanbox_s(env, float32_sqrt(frs1, &env->fp_status)); } target_ulong helper_fle_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); return float32_le(frs1, frs2, &env->fp_status); } target_ulong helper_flt_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); return float32_lt(frs1, frs2, &env->fp_status); } target_ulong helper_feq_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); return float32_eq_quiet(frs1, frs2, &env->fp_status); } target_ulong helper_fcvt_w_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return float32_to_int32(frs1, &env->fp_status); } target_ulong helper_fcvt_wu_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return (int32_t)float32_to_uint32(frs1, &env->fp_status); } target_ulong helper_fcvt_l_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return float32_to_int64(frs1, &env->fp_status); } target_ulong helper_fcvt_lu_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return float32_to_uint64(frs1, &env->fp_status); } uint64_t helper_fcvt_s_w(CPURISCVState *env, target_ulong rs1) { - return nanbox_s(int32_to_float32((int32_t)rs1, &env->fp_status)); + return nanbox_s(env, int32_to_float32((int32_t)rs1, &env->fp_status)); } uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1) { - return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status)); + return nanbox_s(env, uint32_to_float32((uint32_t)rs1, &env->fp_status)); } uint64_t helper_fcvt_s_l(CPURISCVState *env, target_ulong rs1) { - return nanbox_s(int64_to_float32(rs1, &env->fp_status)); + return nanbox_s(env, int64_to_float32(rs1, &env->fp_status)); } uint64_t helper_fcvt_s_lu(CPURISCVState *env, target_ulong rs1) { - return nanbox_s(uint64_to_float32(rs1, &env->fp_status)); + return nanbox_s(env, uint64_to_float32(rs1, &env->fp_status)); } -target_ulong helper_fclass_s(uint64_t rs1) +target_ulong helper_fclass_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return fclass_s(frs1); } @@ -340,12 +341,12 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1) { - return nanbox_s(float64_to_float32(rs1, &env->fp_status)); + return nanbox_s(env, float64_to_float32(rs1, &env->fp_status)); } uint64_t helper_fcvt_d_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return float32_to_float64(frs1, &env->fp_status); } @@ -539,14 +540,14 @@ uint64_t helper_fcvt_h_lu(CPURISCVState *env, target_ulong rs1) uint64_t helper_fcvt_h_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return nanbox_h(float32_to_float16(frs1, true, &env->fp_status)); } uint64_t helper_fcvt_s_h(CPURISCVState *env, uint64_t rs1) { float16 frs1 = check_nanbox_h(rs1); - return nanbox_s(float16_to_float32(frs1, true, &env->fp_status)); + return nanbox_s(env, float16_to_float32(frs1, true, &env->fp_status)); } uint64_t helper_fcvt_h_d(CPURISCVState *env, uint64_t rs1) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 6cf6d6ce98..33f928c76b 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -38,7 +38,7 @@ DEF_HELPER_FLAGS_2(fcvt_s_w, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_s_wu, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, tl) -DEF_HELPER_FLAGS_1(fclass_s, TCG_CALL_NO_RWG_SE, tl, i64) +DEF_HELPER_FLAGS_2(fclass_s, TCG_CALL_NO_RWG_SE, tl, env, i64) /* Floating Point - Double Precision */ DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64) diff --git a/target/riscv/insn_trans/trans_rvf.c.inc b/target/riscv/insn_trans/trans_rvf.c.inc index b5459249c4..50c7faf38f 100644 --- a/target/riscv/insn_trans/trans_rvf.c.inc +++ b/target/riscv/insn_trans/trans_rvf.c.inc @@ -20,7 +20,14 @@ #define REQUIRE_FPU do {\ if (ctx->mstatus_fs == 0) \ - return false; \ + if (!ctx->ext_zfinx) \ + return false; \ +} while (0) + +#define REQUIRE_ZFINX_OR_F(ctx) do {\ + if (!ctx->ext_zfinx) { \ + REQUIRE_EXT(ctx, RVF); \ + } \ } while (0) static bool trans_flw(DisasContext *ctx, arg_flw *a) @@ -70,10 +77,16 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a) static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fmadd_s(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -81,10 +94,16 @@ static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a) static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fmsub_s(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -92,10 +111,16 @@ static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a) static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fnmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fnmsub_s(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -103,10 +128,16 @@ static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a) static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fnmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fnmadd_s(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -114,11 +145,15 @@ static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a) static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fadd_s(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fadd_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -126,11 +161,15 @@ static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a) static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fsub_s(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fsub_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -138,11 +177,15 @@ static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a) static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fmul_s(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fmul_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -150,11 +193,15 @@ static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a) static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fdiv_s(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fdiv_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -162,10 +209,14 @@ static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a) static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fsqrt_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + gen_helper_fsqrt_s(dest, cpu_env, src1); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -173,22 +224,37 @@ static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a) static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); if (a->rs1 == a->rs2) { /* FMOV */ - gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); + if (!ctx->ext_zfinx) { + gen_check_nanbox_s(dest, src1); + } else { + tcg_gen_ext32s_i64(dest, src1); + } } else { /* FSGNJ */ - TCGv_i64 rs1 = tcg_temp_new_i64(); - TCGv_i64 rs2 = tcg_temp_new_i64(); - - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); - - /* This formulation retains the nanboxing of rs2. */ - tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); - tcg_temp_free_i64(rs1); - tcg_temp_free_i64(rs2); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + + if (!ctx->ext_zfinx) { + TCGv_i64 rs1 = tcg_temp_new_i64(); + TCGv_i64 rs2 = tcg_temp_new_i64(); + gen_check_nanbox_s(rs1, src1); + gen_check_nanbox_s(rs2, src2); + + /* This formulation retains the nanboxing of rs2 in normal 'F'. */ + tcg_gen_deposit_i64(dest, rs2, rs1, 0, 31); + + tcg_temp_free_i64(rs1); + tcg_temp_free_i64(rs2); + } else { + tcg_gen_deposit_i64(dest, src2, src1, 0, 31); + tcg_gen_ext32s_i64(dest, dest); + } } + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -198,16 +264,27 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) TCGv_i64 rs1, rs2, mask; REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); - rs1 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + rs1 = tcg_temp_new_i64(); + if (!ctx->ext_zfinx) { + gen_check_nanbox_s(rs1, src1); + } else { + tcg_gen_mov_i64(rs1, src1); + } if (a->rs1 == a->rs2) { /* FNEG */ - tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); + tcg_gen_xori_i64(dest, rs1, MAKE_64BIT_MASK(31, 1)); } else { + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); rs2 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); + if (!ctx->ext_zfinx) { + gen_check_nanbox_s(rs2, src2); + } else { + tcg_gen_mov_i64(rs2, src2); + } /* * Replace bit 31 in rs1 with inverse in rs2. @@ -215,13 +292,17 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) */ mask = tcg_constant_i64(~MAKE_64BIT_MASK(31, 1)); tcg_gen_nor_i64(rs2, rs2, mask); - tcg_gen_and_i64(rs1, mask, rs1); - tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); + tcg_gen_and_i64(dest, mask, rs1); + tcg_gen_or_i64(dest, dest, rs2); tcg_temp_free_i64(rs2); } + /* signed-extended intead of nanboxing for result if enable zfinx */ + if (ctx->ext_zfinx) { + tcg_gen_ext32s_i64(dest, dest); + } + gen_set_fpr_hs(ctx, a->rd, dest); tcg_temp_free_i64(rs1); - mark_fs_dirty(ctx); return true; } @@ -231,28 +312,45 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) TCGv_i64 rs1, rs2; REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); rs1 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); + + if (!ctx->ext_zfinx) { + gen_check_nanbox_s(rs1, src1); + } else { + tcg_gen_mov_i64(rs1, src1); + } if (a->rs1 == a->rs2) { /* FABS */ - tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); + tcg_gen_andi_i64(dest, rs1, ~MAKE_64BIT_MASK(31, 1)); } else { + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); rs2 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); + + if (!ctx->ext_zfinx) { + gen_check_nanbox_s(rs2, src2); + } else { + tcg_gen_mov_i64(rs2, src2); + } /* * Xor bit 31 in rs1 with that in rs2. * This formulation retains the nanboxing of rs1. */ - tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); - tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); + tcg_gen_andi_i64(dest, rs2, MAKE_64BIT_MASK(31, 1)); + tcg_gen_xor_i64(dest, rs1, dest); tcg_temp_free_i64(rs2); } + /* signed-extended intead of nanboxing for result if enable zfinx */ + if (ctx->ext_zfinx) { + tcg_gen_ext32s_i64(dest, dest); + } tcg_temp_free_i64(rs1); - + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -260,10 +358,14 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fmin_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2]); + gen_helper_fmin_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -271,10 +373,14 @@ static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a) static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fmax_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2]); + gen_helper_fmax_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -282,12 +388,13 @@ static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a) static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_w_s(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_w_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -295,12 +402,13 @@ static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a) static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_wu_s(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_wu_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -309,14 +417,14 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a) { /* NOTE: This was FMV.X.S in an earlier version of the ISA spec! */ REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); - + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); #if defined(TARGET_RISCV64) - tcg_gen_ext32s_tl(dest, cpu_fpr[a->rs1]); + tcg_gen_ext32s_tl(dest, src1); #else - tcg_gen_extrl_i64_i32(dest, cpu_fpr[a->rs1]); + tcg_gen_extrl_i64_i32(dest, src1); #endif gen_set_gpr(ctx, a->rd, dest); @@ -326,11 +434,13 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a) static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_feq_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_feq_s(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -338,11 +448,13 @@ static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a) static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_flt_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_flt_s(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -350,11 +462,13 @@ static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a) static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fle_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fle_s(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -362,11 +476,12 @@ static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a) static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); - gen_helper_fclass_s(dest, cpu_fpr[a->rs1]); + gen_helper_fclass_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -374,13 +489,14 @@ static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_w(cpu_fpr[a->rd], cpu_env, src); - + gen_helper_fcvt_s_w(dest, cpu_env, src); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -388,13 +504,14 @@ static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a) static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_wu(cpu_fpr[a->rd], cpu_env, src); - + gen_helper_fcvt_s_wu(dest, cpu_env, src); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -403,13 +520,14 @@ static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a) { /* NOTE: This was FMV.S.X in an earlier version of the ISA spec! */ REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); - tcg_gen_extu_tl_i64(cpu_fpr[a->rd], src); - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); - + tcg_gen_extu_tl_i64(dest, src); + gen_nanbox_s(dest, dest); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -418,12 +536,13 @@ static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_l_s(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_l_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -432,12 +551,13 @@ static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_lu_s(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_lu_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -446,13 +566,14 @@ static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_l(cpu_fpr[a->rd], cpu_env, src); - + gen_helper_fcvt_s_l(dest, cpu_env, src); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -461,13 +582,14 @@ static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_lu(cpu_fpr[a->rd], cpu_env, src); - + gen_helper_fcvt_s_lu(dest, cpu_env, src); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } diff --git a/target/riscv/internals.h b/target/riscv/internals.h index 065e8162a2..6237bb3115 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -46,13 +46,23 @@ enum { RISCV_FRM_ROD = 8, /* Round to Odd */ }; -static inline uint64_t nanbox_s(float32 f) +static inline uint64_t nanbox_s(CPURISCVState *env, float32 f) { - return f | MAKE_64BIT_MASK(32, 32); + /* the value is sign-extended instead of NaN-boxing for zfinx */ + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { + return (int32_t)f; + } else { + return f | MAKE_64BIT_MASK(32, 32); + } } -static inline float32 check_nanbox_s(uint64_t f) +static inline float32 check_nanbox_s(CPURISCVState *env, uint64_t f) { + /* Disable NaN-boxing check when enable zfinx */ + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { + return (uint32_t)f; + } + uint64_t mask = MAKE_64BIT_MASK(32, 32); if (likely((f & mask) == mask)) { diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 8f01063618..e3a30c4748 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -105,10 +105,13 @@ typedef struct DisasContext { target_ulong vstart; bool vl_eq_vlmax; uint8_t ntemp; + uint8_t nftemp; CPUState *cs; TCGv zero; /* Space for 3 operands plus 1 extra for address computation. */ TCGv temp[4]; + /* Space for 4 float point operands */ + TCGv_i64 ftemp[4]; /* PointerMasking extension */ bool pm_enabled; TCGv pm_mask; @@ -359,6 +362,86 @@ static void gen_set_gpr128(DisasContext *ctx, int reg_num, TCGv rl, TCGv rh) } } +static TCGv_i64 ftemp_new(DisasContext *ctx) +{ + assert(ctx->nftemp < ARRAY_SIZE(ctx->ftemp)); + return ctx->ftemp[ctx->nftemp++] = tcg_temp_new_i64(); +} + +static TCGv_i64 get_fpr_hs(DisasContext *ctx, int reg_num) +{ + if (!ctx->ext_zfinx) { + return cpu_fpr[reg_num]; + } + + if (reg_num == 0) { + return tcg_constant_i64(0); + } + switch (get_xl(ctx)) { + case MXL_RV32: +#ifdef TARGET_RISCV32 + { + TCGv_i64 t = ftemp_new(ctx); + tcg_gen_ext_i32_i64(t, cpu_gpr[reg_num]); + return t; + } +#else + /* fall through */ + case MXL_RV64: + return cpu_gpr[reg_num]; +#endif + default: + g_assert_not_reached(); + } +} + +static TCGv_i64 dest_fpr(DisasContext *ctx, int reg_num) +{ + if (!ctx->ext_zfinx) { + return cpu_fpr[reg_num]; + } + + switch (get_xl(ctx)) { + case MXL_RV32: + return ftemp_new(ctx); +#ifdef TARGET_RISCV64 + case MXL_RV64: + if (reg_num == 0) { + return ftemp_new(ctx); + } else { + return cpu_gpr[reg_num]; + } +#endif + default: + g_assert_not_reached(); + } +} + +/* assume t is nanboxing (for normal) or sign-extended (for zfinx) */ +static void gen_set_fpr_hs(DisasContext *ctx, int reg_num, TCGv_i64 t) +{ + if (!ctx->ext_zfinx) { + tcg_gen_mov_i64(cpu_fpr[reg_num], t); + return; + } + if (reg_num != 0) { + switch (get_xl(ctx)) { + case MXL_RV32: +#ifdef TARGET_RISCV32 + tcg_gen_extrl_i64_i32(cpu_gpr[reg_num], t); + break; +#else + /* fall through */ + case MXL_RV64: + tcg_gen_mov_i64(cpu_gpr[reg_num], t); + break; +#endif + default: + g_assert_not_reached(); + } + } +} + static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) { target_ulong next_pc; @@ -921,6 +1004,8 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->cs = cs; ctx->ntemp = 0; memset(ctx->temp, 0, sizeof(ctx->temp)); + ctx->nftemp = 0; + memset(ctx->ftemp, 0, sizeof(ctx->ftemp)); ctx->pm_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_ENABLED); int priv = tb_flags & TB_FLAGS_PRIV_MMU_MASK; ctx->pm_mask = pm_mask[priv]; @@ -955,6 +1040,11 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) ctx->temp[i] = NULL; } ctx->ntemp = 0; + for (int i = ctx->nftemp - 1; i >= 0; --i) { + tcg_temp_free_i64(ctx->ftemp[i]); + ctx->ftemp[i] = NULL; + } + ctx->nftemp = 0; if (ctx->base.is_jmp == DISAS_NEXT) { target_ulong page_start;