Message ID | 20230404020653.18911-5-liweiwei@iscas.ac.cn (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | target/riscv: Fix pointer mask related support | expand |
On 2023/4/4 10:06, Weiwei Li wrote: > Add a base pc_save for PC-relative translation(CF_PCREL). > Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb. > We can get pc-relative address from following formula: > real_pc = (old)env->pc + diff, where diff = target_pc - ctx->pc_save. > Use gen_get_target_pc to compute target address of auipc and successor > address of jalr and jal. > > The existence of CF_PCREL can improve performance with the guest > kernel's address space randomization. Each guest process maps libc.so > (et al) at a different virtual address, and this allows those > translations to be shared. > > Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> > Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/riscv/cpu.c | 29 +++++++++------ > target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++-- Miss the process for trans_ebreak. I want to construct the PCREL feature on the processing of ctx pc related fields, which is the reason why we need do specially process. For example, static bool trans_auipc(DisasContext *ctx, arg_auipc *a) { - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); + if (tb_cflags(ctx->cflags) & CF_PCREL) { + target_ulong pc_rel = ctx->base.pc_next - ctx->base.pc_first + a->imm; + gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel); + } else { + gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); + } return true; } +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv t, target_ulong rel) +{ + TCGv dest = dest_gpr(ctx, reg_num); + tcg_gen_addi_tl(dest, t, rel); + gen_set_gpr(ctx, reg_num, dest); +} + But if it is too difficult to reuse the current implementation, your implementation is also acceptable to me. Zhiwei > target/riscv/translate.c | 48 ++++++++++++++++++++----- > 3 files changed, 70 insertions(+), 21 deletions(-) > > diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c > index 1e97473af2..646fa31a59 100644 > --- a/target/riscv/cpu.c > +++ b/target/riscv/cpu.c > @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs) > static void riscv_cpu_synchronize_from_tb(CPUState *cs, > const TranslationBlock *tb) > { > - RISCVCPU *cpu = RISCV_CPU(cs); > - CPURISCVState *env = &cpu->env; > - RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); > + if (!(tb_cflags(tb) & CF_PCREL)) { > + RISCVCPU *cpu = RISCV_CPU(cs); > + CPURISCVState *env = &cpu->env; > + RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); > > - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); > + tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); > > - if (xl == MXL_RV32) { > - env->pc = (int32_t) tb->pc; > - } else { > - env->pc = tb->pc; > + if (xl == MXL_RV32) { > + env->pc = (int32_t) tb->pc; > + } else { > + env->pc = tb->pc; > + } > } > } > > @@ -693,11 +695,18 @@ static void riscv_restore_state_to_opc(CPUState *cs, > RISCVCPU *cpu = RISCV_CPU(cs); > CPURISCVState *env = &cpu->env; > RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); > + target_ulong pc; > + > + if (tb_cflags(tb) & CF_PCREL) { > + pc = (env->pc & TARGET_PAGE_MASK) | data[0]; > + } else { > + pc = data[0]; > + } > > if (xl == MXL_RV32) { > - env->pc = (int32_t)data[0]; > + env->pc = (int32_t)pc; > } else { > - env->pc = data[0]; > + env->pc = pc; > } > env->bins = data[1]; > } > diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc > index cc72864d32..7cbbdac5aa 100644 > --- a/target/riscv/insn_trans/trans_rvi.c.inc > +++ b/target/riscv/insn_trans/trans_rvi.c.inc > @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a) > > static bool trans_auipc(DisasContext *ctx, arg_auipc *a) > { > - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); > + TCGv target_pc = dest_gpr(ctx, a->rd); > + gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next); > + gen_set_gpr(ctx, a->rd, target_pc); > return true; > } > > @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a) > { > TCGLabel *misaligned = NULL; > TCGv target_pc = tcg_temp_new(); > + TCGv succ_pc = dest_gpr(ctx, a->rd); > > tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), a->imm); > tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2); > @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a) > tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned); > } > > - gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn); > + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); > + gen_set_gpr(ctx, a->rd, succ_pc); > + > tcg_gen_mov_tl(cpu_pc, target_pc); > lookup_and_goto_ptr(ctx); > > @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond) > TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN); > TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN); > target_ulong next_pc; > + target_ulong orig_pc_save = ctx->pc_save; > > if (get_xl(ctx) == MXL_RV128) { > TCGv src1h = get_gprh(ctx, a->rs1); > @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond) > > gen_set_label(l); /* branch taken */ > > + ctx->pc_save = orig_pc_save; > next_pc = ctx->base.pc_next + a->imm; > if (!has_ext(ctx, RVC) && (next_pc & 0x3)) { > /* misaligned */ > @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond) > gen_pc_plus_diff(target_pc, ctx, next_pc); > gen_exception_inst_addr_mis(ctx, target_pc); > } else { > - gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm); > + gen_goto_tb(ctx, 0, next_pc); > } > + ctx->pc_save = -1; > ctx->base.is_jmp = DISAS_NORETURN; > > return true; > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index d434fedb37..4623749602 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -59,6 +59,7 @@ typedef struct DisasContext { > DisasContextBase base; > /* pc_succ_insn points to the instruction following base.pc_next */ > target_ulong pc_succ_insn; > + target_ulong pc_save; > target_ulong priv_ver; > RISCVMXL misa_mxl_max; > RISCVMXL xl; > @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx) > static void gen_pc_plus_diff(TCGv target, DisasContext *ctx, > target_ulong dest) > { > - if (get_xl(ctx) == MXL_RV32) { > - dest = (int32_t)dest; > + assert(ctx->pc_save != -1); > + if (tb_cflags(ctx->base.tb) & CF_PCREL) { > + tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save); > + if (get_xl(ctx) == MXL_RV32) { > + tcg_gen_ext32s_tl(target, target); > + } > + } else { > + if (get_xl(ctx) == MXL_RV32) { > + dest = (int32_t)dest; > + } > + tcg_gen_movi_tl(target, dest); > } > - tcg_gen_movi_tl(target, dest); > } > > static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest) > { > gen_pc_plus_diff(cpu_pc, ctx, dest); > + ctx->pc_save = dest; > } > > static void generate_exception(DisasContext *ctx, int excp) > @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) > * direct block chain benefits will be small. > */ > if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) { > - tcg_gen_goto_tb(n); > - gen_set_pc_imm(ctx, dest); > + /* > + * For pcrel, the pc must always be up-to-date on entry to > + * the linked TB, so that it can use simple additions for all > + * further adjustments. For !pcrel, the linked TB is compiled > + * to know its full virtual address, so we can delay the > + * update to pc to the unlinked path. A long chain of links > + * can thus avoid many updates to the PC. > + */ > + if (tb_cflags(ctx->base.tb) & CF_PCREL) { > + gen_set_pc_imm(ctx, dest); > + tcg_gen_goto_tb(n); > + } else { > + tcg_gen_goto_tb(n); > + gen_set_pc_imm(ctx, dest); > + } > tcg_gen_exit_tb(ctx->base.tb, n); > } else { > gen_set_pc_imm(ctx, dest); > @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, int reg_num, TCGv_i64 t) > static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) > { > target_ulong next_pc; > + TCGv succ_pc = dest_gpr(ctx, rd); > > /* check misaligned: */ > next_pc = ctx->base.pc_next + imm; > @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) > } > } > > - gen_set_gpri(ctx, rd, ctx->pc_succ_insn); > - gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this for safety */ > + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); > + gen_set_gpr(ctx, rd, succ_pc); > + > + gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */ > ctx->base.is_jmp = DISAS_NORETURN; > } > > @@ -1150,6 +1176,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) > RISCVCPU *cpu = RISCV_CPU(cs); > uint32_t tb_flags = ctx->base.tb->flags; > > + ctx->pc_save = ctx->base.pc_first; > ctx->pc_succ_insn = ctx->base.pc_first; > ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX); > ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; > @@ -1195,8 +1222,13 @@ static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) > static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) > { > DisasContext *ctx = container_of(dcbase, DisasContext, base); > + target_ulong pc_next = ctx->base.pc_next; > + > + if (tb_cflags(dcbase->tb) & CF_PCREL) { > + pc_next &= ~TARGET_PAGE_MASK; > + } > > - tcg_gen_insn_start(ctx->base.pc_next, 0); > + tcg_gen_insn_start(pc_next, 0); > ctx->insn_start = tcg_last_op(); > } >
On 2023/4/4 11:12, LIU Zhiwei wrote: > > On 2023/4/4 10:06, Weiwei Li wrote: >> Add a base pc_save for PC-relative translation(CF_PCREL). >> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb. >> We can get pc-relative address from following formula: >> real_pc = (old)env->pc + diff, where diff = target_pc - ctx->pc_save. >> Use gen_get_target_pc to compute target address of auipc and successor >> address of jalr and jal. >> >> The existence of CF_PCREL can improve performance with the guest >> kernel's address space randomization. Each guest process maps libc.so >> (et al) at a different virtual address, and this allows those >> translations to be shared. >> >> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> >> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> >> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> target/riscv/cpu.c | 29 +++++++++------ >> target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++-- > > Miss the process for trans_ebreak. Please ignore this sentence comment. It will not influence the codegen. Zhiwei > > I want to construct the PCREL feature on the processing of ctx pc > related fields, which is the reason why we need do specially process. > For example, > > static bool trans_auipc(DisasContext *ctx, arg_auipc *a) > { > - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); > + if (tb_cflags(ctx->cflags) & CF_PCREL) { > + target_ulong pc_rel = ctx->base.pc_next - ctx->base.pc_first > + a->imm; > + gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel); > + } else { > + gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); > + } > return true; > } > > +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv t, > target_ulong rel) > +{ > + TCGv dest = dest_gpr(ctx, reg_num); > + tcg_gen_addi_tl(dest, t, rel); > + gen_set_gpr(ctx, reg_num, dest); > +} > + > > But if it is too difficult to reuse the current implementation, your > implementation is also acceptable to me. > > Zhiwei > >> target/riscv/translate.c | 48 ++++++++++++++++++++----- >> 3 files changed, 70 insertions(+), 21 deletions(-) >> >> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c >> index 1e97473af2..646fa31a59 100644 >> --- a/target/riscv/cpu.c >> +++ b/target/riscv/cpu.c >> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs) >> static void riscv_cpu_synchronize_from_tb(CPUState *cs, >> const TranslationBlock *tb) >> { >> - RISCVCPU *cpu = RISCV_CPU(cs); >> - CPURISCVState *env = &cpu->env; >> - RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >> + if (!(tb_cflags(tb) & CF_PCREL)) { >> + RISCVCPU *cpu = RISCV_CPU(cs); >> + CPURISCVState *env = &cpu->env; >> + RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >> - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >> + tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >> - if (xl == MXL_RV32) { >> - env->pc = (int32_t) tb->pc; >> - } else { >> - env->pc = tb->pc; >> + if (xl == MXL_RV32) { >> + env->pc = (int32_t) tb->pc; >> + } else { >> + env->pc = tb->pc; >> + } >> } >> } >> @@ -693,11 +695,18 @@ static void >> riscv_restore_state_to_opc(CPUState *cs, >> RISCVCPU *cpu = RISCV_CPU(cs); >> CPURISCVState *env = &cpu->env; >> RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >> + target_ulong pc; >> + >> + if (tb_cflags(tb) & CF_PCREL) { >> + pc = (env->pc & TARGET_PAGE_MASK) | data[0]; >> + } else { >> + pc = data[0]; >> + } >> if (xl == MXL_RV32) { >> - env->pc = (int32_t)data[0]; >> + env->pc = (int32_t)pc; >> } else { >> - env->pc = data[0]; >> + env->pc = pc; >> } >> env->bins = data[1]; >> } >> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc >> b/target/riscv/insn_trans/trans_rvi.c.inc >> index cc72864d32..7cbbdac5aa 100644 >> --- a/target/riscv/insn_trans/trans_rvi.c.inc >> +++ b/target/riscv/insn_trans/trans_rvi.c.inc >> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a) >> static bool trans_auipc(DisasContext *ctx, arg_auipc *a) >> { >> - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >> + TCGv target_pc = dest_gpr(ctx, a->rd); >> + gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next); >> + gen_set_gpr(ctx, a->rd, target_pc); >> return true; >> } >> @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, >> arg_jalr *a) >> { >> TCGLabel *misaligned = NULL; >> TCGv target_pc = tcg_temp_new(); >> + TCGv succ_pc = dest_gpr(ctx, a->rd); >> tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), >> a->imm); >> tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2); >> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a) >> tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned); >> } >> - gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn); >> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >> + gen_set_gpr(ctx, a->rd, succ_pc); >> + >> tcg_gen_mov_tl(cpu_pc, target_pc); >> lookup_and_goto_ptr(ctx); >> @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, arg_b >> *a, TCGCond cond) >> TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN); >> TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN); >> target_ulong next_pc; >> + target_ulong orig_pc_save = ctx->pc_save; >> if (get_xl(ctx) == MXL_RV128) { >> TCGv src1h = get_gprh(ctx, a->rs1); >> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b >> *a, TCGCond cond) >> gen_set_label(l); /* branch taken */ >> + ctx->pc_save = orig_pc_save; >> next_pc = ctx->base.pc_next + a->imm; >> if (!has_ext(ctx, RVC) && (next_pc & 0x3)) { >> /* misaligned */ >> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b >> *a, TCGCond cond) >> gen_pc_plus_diff(target_pc, ctx, next_pc); >> gen_exception_inst_addr_mis(ctx, target_pc); >> } else { >> - gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm); >> + gen_goto_tb(ctx, 0, next_pc); >> } >> + ctx->pc_save = -1; >> ctx->base.is_jmp = DISAS_NORETURN; >> return true; >> diff --git a/target/riscv/translate.c b/target/riscv/translate.c >> index d434fedb37..4623749602 100644 >> --- a/target/riscv/translate.c >> +++ b/target/riscv/translate.c >> @@ -59,6 +59,7 @@ typedef struct DisasContext { >> DisasContextBase base; >> /* pc_succ_insn points to the instruction following >> base.pc_next */ >> target_ulong pc_succ_insn; >> + target_ulong pc_save; >> target_ulong priv_ver; >> RISCVMXL misa_mxl_max; >> RISCVMXL xl; >> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx) >> static void gen_pc_plus_diff(TCGv target, DisasContext *ctx, >> target_ulong dest) >> { >> - if (get_xl(ctx) == MXL_RV32) { >> - dest = (int32_t)dest; >> + assert(ctx->pc_save != -1); >> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >> + tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save); >> + if (get_xl(ctx) == MXL_RV32) { >> + tcg_gen_ext32s_tl(target, target); >> + } >> + } else { >> + if (get_xl(ctx) == MXL_RV32) { >> + dest = (int32_t)dest; >> + } >> + tcg_gen_movi_tl(target, dest); >> } >> - tcg_gen_movi_tl(target, dest); >> } >> static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest) >> { >> gen_pc_plus_diff(cpu_pc, ctx, dest); >> + ctx->pc_save = dest; >> } >> static void generate_exception(DisasContext *ctx, int excp) >> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int >> n, target_ulong dest) >> * direct block chain benefits will be small. >> */ >> if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) { >> - tcg_gen_goto_tb(n); >> - gen_set_pc_imm(ctx, dest); >> + /* >> + * For pcrel, the pc must always be up-to-date on entry to >> + * the linked TB, so that it can use simple additions for all >> + * further adjustments. For !pcrel, the linked TB is compiled >> + * to know its full virtual address, so we can delay the >> + * update to pc to the unlinked path. A long chain of links >> + * can thus avoid many updates to the PC. >> + */ >> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >> + gen_set_pc_imm(ctx, dest); >> + tcg_gen_goto_tb(n); >> + } else { >> + tcg_gen_goto_tb(n); >> + gen_set_pc_imm(ctx, dest); >> + } >> tcg_gen_exit_tb(ctx->base.tb, n); >> } else { >> gen_set_pc_imm(ctx, dest); >> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, int >> reg_num, TCGv_i64 t) >> static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) >> { >> target_ulong next_pc; >> + TCGv succ_pc = dest_gpr(ctx, rd); >> /* check misaligned: */ >> next_pc = ctx->base.pc_next + imm; >> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, >> target_ulong imm) >> } >> } >> - gen_set_gpri(ctx, rd, ctx->pc_succ_insn); >> - gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this >> for safety */ >> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >> + gen_set_gpr(ctx, rd, succ_pc); >> + >> + gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */ >> ctx->base.is_jmp = DISAS_NORETURN; >> } >> @@ -1150,6 +1176,7 @@ static void >> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) >> RISCVCPU *cpu = RISCV_CPU(cs); >> uint32_t tb_flags = ctx->base.tb->flags; >> + ctx->pc_save = ctx->base.pc_first; >> ctx->pc_succ_insn = ctx->base.pc_first; >> ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX); >> ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; >> @@ -1195,8 +1222,13 @@ static void riscv_tr_tb_start(DisasContextBase >> *db, CPUState *cpu) >> static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState >> *cpu) >> { >> DisasContext *ctx = container_of(dcbase, DisasContext, base); >> + target_ulong pc_next = ctx->base.pc_next; >> + >> + if (tb_cflags(dcbase->tb) & CF_PCREL) { >> + pc_next &= ~TARGET_PAGE_MASK; >> + } >> - tcg_gen_insn_start(ctx->base.pc_next, 0); >> + tcg_gen_insn_start(pc_next, 0); >> ctx->insn_start = tcg_last_op(); >> }
On 2023/4/4 11:12, LIU Zhiwei wrote: > > On 2023/4/4 10:06, Weiwei Li wrote: >> Add a base pc_save for PC-relative translation(CF_PCREL). >> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb. >> We can get pc-relative address from following formula: >> real_pc = (old)env->pc + diff, where diff = target_pc - ctx->pc_save. >> Use gen_get_target_pc to compute target address of auipc and successor >> address of jalr and jal. >> >> The existence of CF_PCREL can improve performance with the guest >> kernel's address space randomization. Each guest process maps libc.so >> (et al) at a different virtual address, and this allows those >> translations to be shared. >> >> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> >> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> >> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> target/riscv/cpu.c | 29 +++++++++------ >> target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++-- > > Miss the process for trans_ebreak. > > I want to construct the PCREL feature on the processing of ctx pc > related fields, which is the reason why we need do specially process. > For example, > > static bool trans_auipc(DisasContext *ctx, arg_auipc *a) > { > - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); > + if (tb_cflags(ctx->cflags) & CF_PCREL) { > + target_ulong pc_rel = ctx->base.pc_next - ctx->base.pc_first > + a->imm; > + gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel); > + } else { > + gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); > + } > return true; > } > > +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv t, > target_ulong rel) > +{ > + TCGv dest = dest_gpr(ctx, reg_num); > + tcg_gen_addi_tl(dest, t, rel); > + gen_set_gpr(ctx, reg_num, dest); > +} > + > > But if it is too difficult to reuse the current implementation, your > implementation is also acceptable to me. Sorry, I don't get your idea. gen_pc_plus_diff() can do all the above job. Regards, Weiwei Li > > Zhiwei > >> target/riscv/translate.c | 48 ++++++++++++++++++++----- >> 3 files changed, 70 insertions(+), 21 deletions(-) >> >> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c >> index 1e97473af2..646fa31a59 100644 >> --- a/target/riscv/cpu.c >> +++ b/target/riscv/cpu.c >> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs) >> static void riscv_cpu_synchronize_from_tb(CPUState *cs, >> const TranslationBlock *tb) >> { >> - RISCVCPU *cpu = RISCV_CPU(cs); >> - CPURISCVState *env = &cpu->env; >> - RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >> + if (!(tb_cflags(tb) & CF_PCREL)) { >> + RISCVCPU *cpu = RISCV_CPU(cs); >> + CPURISCVState *env = &cpu->env; >> + RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >> - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >> + tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >> - if (xl == MXL_RV32) { >> - env->pc = (int32_t) tb->pc; >> - } else { >> - env->pc = tb->pc; >> + if (xl == MXL_RV32) { >> + env->pc = (int32_t) tb->pc; >> + } else { >> + env->pc = tb->pc; >> + } >> } >> } >> @@ -693,11 +695,18 @@ static void >> riscv_restore_state_to_opc(CPUState *cs, >> RISCVCPU *cpu = RISCV_CPU(cs); >> CPURISCVState *env = &cpu->env; >> RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >> + target_ulong pc; >> + >> + if (tb_cflags(tb) & CF_PCREL) { >> + pc = (env->pc & TARGET_PAGE_MASK) | data[0]; >> + } else { >> + pc = data[0]; >> + } >> if (xl == MXL_RV32) { >> - env->pc = (int32_t)data[0]; >> + env->pc = (int32_t)pc; >> } else { >> - env->pc = data[0]; >> + env->pc = pc; >> } >> env->bins = data[1]; >> } >> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc >> b/target/riscv/insn_trans/trans_rvi.c.inc >> index cc72864d32..7cbbdac5aa 100644 >> --- a/target/riscv/insn_trans/trans_rvi.c.inc >> +++ b/target/riscv/insn_trans/trans_rvi.c.inc >> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a) >> static bool trans_auipc(DisasContext *ctx, arg_auipc *a) >> { >> - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >> + TCGv target_pc = dest_gpr(ctx, a->rd); >> + gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next); >> + gen_set_gpr(ctx, a->rd, target_pc); >> return true; >> } >> @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, >> arg_jalr *a) >> { >> TCGLabel *misaligned = NULL; >> TCGv target_pc = tcg_temp_new(); >> + TCGv succ_pc = dest_gpr(ctx, a->rd); >> tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), >> a->imm); >> tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2); >> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a) >> tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned); >> } >> - gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn); >> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >> + gen_set_gpr(ctx, a->rd, succ_pc); >> + >> tcg_gen_mov_tl(cpu_pc, target_pc); >> lookup_and_goto_ptr(ctx); >> @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, arg_b >> *a, TCGCond cond) >> TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN); >> TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN); >> target_ulong next_pc; >> + target_ulong orig_pc_save = ctx->pc_save; >> if (get_xl(ctx) == MXL_RV128) { >> TCGv src1h = get_gprh(ctx, a->rs1); >> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b >> *a, TCGCond cond) >> gen_set_label(l); /* branch taken */ >> + ctx->pc_save = orig_pc_save; >> next_pc = ctx->base.pc_next + a->imm; >> if (!has_ext(ctx, RVC) && (next_pc & 0x3)) { >> /* misaligned */ >> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b >> *a, TCGCond cond) >> gen_pc_plus_diff(target_pc, ctx, next_pc); >> gen_exception_inst_addr_mis(ctx, target_pc); >> } else { >> - gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm); >> + gen_goto_tb(ctx, 0, next_pc); >> } >> + ctx->pc_save = -1; >> ctx->base.is_jmp = DISAS_NORETURN; >> return true; >> diff --git a/target/riscv/translate.c b/target/riscv/translate.c >> index d434fedb37..4623749602 100644 >> --- a/target/riscv/translate.c >> +++ b/target/riscv/translate.c >> @@ -59,6 +59,7 @@ typedef struct DisasContext { >> DisasContextBase base; >> /* pc_succ_insn points to the instruction following >> base.pc_next */ >> target_ulong pc_succ_insn; >> + target_ulong pc_save; >> target_ulong priv_ver; >> RISCVMXL misa_mxl_max; >> RISCVMXL xl; >> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx) >> static void gen_pc_plus_diff(TCGv target, DisasContext *ctx, >> target_ulong dest) >> { >> - if (get_xl(ctx) == MXL_RV32) { >> - dest = (int32_t)dest; >> + assert(ctx->pc_save != -1); >> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >> + tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save); >> + if (get_xl(ctx) == MXL_RV32) { >> + tcg_gen_ext32s_tl(target, target); >> + } >> + } else { >> + if (get_xl(ctx) == MXL_RV32) { >> + dest = (int32_t)dest; >> + } >> + tcg_gen_movi_tl(target, dest); >> } >> - tcg_gen_movi_tl(target, dest); >> } >> static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest) >> { >> gen_pc_plus_diff(cpu_pc, ctx, dest); >> + ctx->pc_save = dest; >> } >> static void generate_exception(DisasContext *ctx, int excp) >> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int >> n, target_ulong dest) >> * direct block chain benefits will be small. >> */ >> if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) { >> - tcg_gen_goto_tb(n); >> - gen_set_pc_imm(ctx, dest); >> + /* >> + * For pcrel, the pc must always be up-to-date on entry to >> + * the linked TB, so that it can use simple additions for all >> + * further adjustments. For !pcrel, the linked TB is compiled >> + * to know its full virtual address, so we can delay the >> + * update to pc to the unlinked path. A long chain of links >> + * can thus avoid many updates to the PC. >> + */ >> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >> + gen_set_pc_imm(ctx, dest); >> + tcg_gen_goto_tb(n); >> + } else { >> + tcg_gen_goto_tb(n); >> + gen_set_pc_imm(ctx, dest); >> + } >> tcg_gen_exit_tb(ctx->base.tb, n); >> } else { >> gen_set_pc_imm(ctx, dest); >> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, int >> reg_num, TCGv_i64 t) >> static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) >> { >> target_ulong next_pc; >> + TCGv succ_pc = dest_gpr(ctx, rd); >> /* check misaligned: */ >> next_pc = ctx->base.pc_next + imm; >> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, >> target_ulong imm) >> } >> } >> - gen_set_gpri(ctx, rd, ctx->pc_succ_insn); >> - gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this >> for safety */ >> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >> + gen_set_gpr(ctx, rd, succ_pc); >> + >> + gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */ >> ctx->base.is_jmp = DISAS_NORETURN; >> } >> @@ -1150,6 +1176,7 @@ static void >> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) >> RISCVCPU *cpu = RISCV_CPU(cs); >> uint32_t tb_flags = ctx->base.tb->flags; >> + ctx->pc_save = ctx->base.pc_first; >> ctx->pc_succ_insn = ctx->base.pc_first; >> ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX); >> ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; >> @@ -1195,8 +1222,13 @@ static void riscv_tr_tb_start(DisasContextBase >> *db, CPUState *cpu) >> static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState >> *cpu) >> { >> DisasContext *ctx = container_of(dcbase, DisasContext, base); >> + target_ulong pc_next = ctx->base.pc_next; >> + >> + if (tb_cflags(dcbase->tb) & CF_PCREL) { >> + pc_next &= ~TARGET_PAGE_MASK; >> + } >> - tcg_gen_insn_start(ctx->base.pc_next, 0); >> + tcg_gen_insn_start(pc_next, 0); >> ctx->insn_start = tcg_last_op(); >> }
On 2023/4/4 11:46, liweiwei wrote: > > On 2023/4/4 11:12, LIU Zhiwei wrote: >> >> On 2023/4/4 10:06, Weiwei Li wrote: >>> Add a base pc_save for PC-relative translation(CF_PCREL). >>> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb. >>> We can get pc-relative address from following formula: >>> real_pc = (old)env->pc + diff, where diff = target_pc - >>> ctx->pc_save. >>> Use gen_get_target_pc to compute target address of auipc and successor >>> address of jalr and jal. >>> >>> The existence of CF_PCREL can improve performance with the guest >>> kernel's address space randomization. Each guest process maps libc.so >>> (et al) at a different virtual address, and this allows those >>> translations to be shared. >>> >>> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> >>> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> >>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> >>> --- >>> target/riscv/cpu.c | 29 +++++++++------ >>> target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++-- >> >> Miss the process for trans_ebreak. >> >> I want to construct the PCREL feature on the processing of ctx pc >> related fields, which is the reason why we need do specially process. >> For example, >> >> static bool trans_auipc(DisasContext *ctx, arg_auipc *a) >> { >> - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >> + if (tb_cflags(ctx->cflags) & CF_PCREL) { >> + target_ulong pc_rel = ctx->base.pc_next - ctx->base.pc_first >> + a->imm; >> + gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel); >> + } else { >> + gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >> + } >> return true; >> } >> >> +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv >> t, target_ulong rel) >> +{ >> + TCGv dest = dest_gpr(ctx, reg_num); >> + tcg_gen_addi_tl(dest, t, rel); >> + gen_set_gpr(ctx, reg_num, dest); >> +} >> + >> >> But if it is too difficult to reuse the current implementation, your >> implementation is also acceptable to me. > > Sorry, I don't get your idea. gen_pc_plus_diff() can do all the above > job. Yes, I think so. I just suspect whether it is easy to read and verify the correctness. And the maintenance for the future. 1) Maybe we should split the PCREL to a split patch set, as it is a new feature. The point masking can still use this thread. 2) For the new patch set for PCREL, process where we need to modify one by one. One clue for recognize where to modify is the ctx pc related fields, such as pc_next/pc_first/succ_insn_pc. One thing may worth to try is that don't change the code in insn_trans/trans_X. Just rename the origin API we need to modify to a new name with _abs suffix. And and a correspond set of API for PCREL with _pcrel suffix. For example, in DisasContext, we define void (*gen_set_gpri)(DisasContext *ctx, int reg_num, target_long imm); In disas_init_fn, if (tb_cflags(tb) & CF_PCREL) { gen_set_gpri = gen_set_gpri_pcrel; } else { gen_set_gpri = gen_set_gpri_abs; } Thus we can write the code in trans_insn without think about the PCREL. Thanks, Zhiwei > > Regards, > > Weiwei Li > >> >> Zhiwei >> >>> target/riscv/translate.c | 48 ++++++++++++++++++++----- >>> 3 files changed, 70 insertions(+), 21 deletions(-) >>> >>> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c >>> index 1e97473af2..646fa31a59 100644 >>> --- a/target/riscv/cpu.c >>> +++ b/target/riscv/cpu.c >>> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs) >>> static void riscv_cpu_synchronize_from_tb(CPUState *cs, >>> const TranslationBlock *tb) >>> { >>> - RISCVCPU *cpu = RISCV_CPU(cs); >>> - CPURISCVState *env = &cpu->env; >>> - RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >>> + if (!(tb_cflags(tb) & CF_PCREL)) { >>> + RISCVCPU *cpu = RISCV_CPU(cs); >>> + CPURISCVState *env = &cpu->env; >>> + RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >>> - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >>> + tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >>> - if (xl == MXL_RV32) { >>> - env->pc = (int32_t) tb->pc; >>> - } else { >>> - env->pc = tb->pc; >>> + if (xl == MXL_RV32) { >>> + env->pc = (int32_t) tb->pc; >>> + } else { >>> + env->pc = tb->pc; >>> + } >>> } >>> } >>> @@ -693,11 +695,18 @@ static void >>> riscv_restore_state_to_opc(CPUState *cs, >>> RISCVCPU *cpu = RISCV_CPU(cs); >>> CPURISCVState *env = &cpu->env; >>> RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >>> + target_ulong pc; >>> + >>> + if (tb_cflags(tb) & CF_PCREL) { >>> + pc = (env->pc & TARGET_PAGE_MASK) | data[0]; >>> + } else { >>> + pc = data[0]; >>> + } >>> if (xl == MXL_RV32) { >>> - env->pc = (int32_t)data[0]; >>> + env->pc = (int32_t)pc; >>> } else { >>> - env->pc = data[0]; >>> + env->pc = pc; >>> } >>> env->bins = data[1]; >>> } >>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc >>> b/target/riscv/insn_trans/trans_rvi.c.inc >>> index cc72864d32..7cbbdac5aa 100644 >>> --- a/target/riscv/insn_trans/trans_rvi.c.inc >>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc >>> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a) >>> static bool trans_auipc(DisasContext *ctx, arg_auipc *a) >>> { >>> - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >>> + TCGv target_pc = dest_gpr(ctx, a->rd); >>> + gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next); >>> + gen_set_gpr(ctx, a->rd, target_pc); >>> return true; >>> } >>> @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, >>> arg_jalr *a) >>> { >>> TCGLabel *misaligned = NULL; >>> TCGv target_pc = tcg_temp_new(); >>> + TCGv succ_pc = dest_gpr(ctx, a->rd); >>> tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), >>> a->imm); >>> tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2); >>> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr >>> *a) >>> tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned); >>> } >>> - gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn); >>> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >>> + gen_set_gpr(ctx, a->rd, succ_pc); >>> + >>> tcg_gen_mov_tl(cpu_pc, target_pc); >>> lookup_and_goto_ptr(ctx); >>> @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, >>> arg_b *a, TCGCond cond) >>> TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN); >>> TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN); >>> target_ulong next_pc; >>> + target_ulong orig_pc_save = ctx->pc_save; >>> if (get_xl(ctx) == MXL_RV128) { >>> TCGv src1h = get_gprh(ctx, a->rs1); >>> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b >>> *a, TCGCond cond) >>> gen_set_label(l); /* branch taken */ >>> + ctx->pc_save = orig_pc_save; >>> next_pc = ctx->base.pc_next + a->imm; >>> if (!has_ext(ctx, RVC) && (next_pc & 0x3)) { >>> /* misaligned */ >>> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b >>> *a, TCGCond cond) >>> gen_pc_plus_diff(target_pc, ctx, next_pc); >>> gen_exception_inst_addr_mis(ctx, target_pc); >>> } else { >>> - gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm); >>> + gen_goto_tb(ctx, 0, next_pc); >>> } >>> + ctx->pc_save = -1; >>> ctx->base.is_jmp = DISAS_NORETURN; >>> return true; >>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c >>> index d434fedb37..4623749602 100644 >>> --- a/target/riscv/translate.c >>> +++ b/target/riscv/translate.c >>> @@ -59,6 +59,7 @@ typedef struct DisasContext { >>> DisasContextBase base; >>> /* pc_succ_insn points to the instruction following >>> base.pc_next */ >>> target_ulong pc_succ_insn; >>> + target_ulong pc_save; >>> target_ulong priv_ver; >>> RISCVMXL misa_mxl_max; >>> RISCVMXL xl; >>> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx) >>> static void gen_pc_plus_diff(TCGv target, DisasContext *ctx, >>> target_ulong dest) >>> { >>> - if (get_xl(ctx) == MXL_RV32) { >>> - dest = (int32_t)dest; >>> + assert(ctx->pc_save != -1); >>> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >>> + tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save); >>> + if (get_xl(ctx) == MXL_RV32) { >>> + tcg_gen_ext32s_tl(target, target); >>> + } >>> + } else { >>> + if (get_xl(ctx) == MXL_RV32) { >>> + dest = (int32_t)dest; >>> + } >>> + tcg_gen_movi_tl(target, dest); >>> } >>> - tcg_gen_movi_tl(target, dest); >>> } >>> static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest) >>> { >>> gen_pc_plus_diff(cpu_pc, ctx, dest); >>> + ctx->pc_save = dest; >>> } >>> static void generate_exception(DisasContext *ctx, int excp) >>> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int >>> n, target_ulong dest) >>> * direct block chain benefits will be small. >>> */ >>> if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) { >>> - tcg_gen_goto_tb(n); >>> - gen_set_pc_imm(ctx, dest); >>> + /* >>> + * For pcrel, the pc must always be up-to-date on entry to >>> + * the linked TB, so that it can use simple additions for all >>> + * further adjustments. For !pcrel, the linked TB is compiled >>> + * to know its full virtual address, so we can delay the >>> + * update to pc to the unlinked path. A long chain of links >>> + * can thus avoid many updates to the PC. >>> + */ >>> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >>> + gen_set_pc_imm(ctx, dest); >>> + tcg_gen_goto_tb(n); >>> + } else { >>> + tcg_gen_goto_tb(n); >>> + gen_set_pc_imm(ctx, dest); >>> + } >>> tcg_gen_exit_tb(ctx->base.tb, n); >>> } else { >>> gen_set_pc_imm(ctx, dest); >>> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, int >>> reg_num, TCGv_i64 t) >>> static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) >>> { >>> target_ulong next_pc; >>> + TCGv succ_pc = dest_gpr(ctx, rd); >>> /* check misaligned: */ >>> next_pc = ctx->base.pc_next + imm; >>> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, >>> target_ulong imm) >>> } >>> } >>> - gen_set_gpri(ctx, rd, ctx->pc_succ_insn); >>> - gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this >>> for safety */ >>> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >>> + gen_set_gpr(ctx, rd, succ_pc); >>> + >>> + gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */ >>> ctx->base.is_jmp = DISAS_NORETURN; >>> } >>> @@ -1150,6 +1176,7 @@ static void >>> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) >>> RISCVCPU *cpu = RISCV_CPU(cs); >>> uint32_t tb_flags = ctx->base.tb->flags; >>> + ctx->pc_save = ctx->base.pc_first; >>> ctx->pc_succ_insn = ctx->base.pc_first; >>> ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX); >>> ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; >>> @@ -1195,8 +1222,13 @@ static void >>> riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) >>> static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState >>> *cpu) >>> { >>> DisasContext *ctx = container_of(dcbase, DisasContext, base); >>> + target_ulong pc_next = ctx->base.pc_next; >>> + >>> + if (tb_cflags(dcbase->tb) & CF_PCREL) { >>> + pc_next &= ~TARGET_PAGE_MASK; >>> + } >>> - tcg_gen_insn_start(ctx->base.pc_next, 0); >>> + tcg_gen_insn_start(pc_next, 0); >>> ctx->insn_start = tcg_last_op(); >>> }
On 2023/4/4 15:07, LIU Zhiwei wrote: > > On 2023/4/4 11:46, liweiwei wrote: >> >> On 2023/4/4 11:12, LIU Zhiwei wrote: >>> >>> On 2023/4/4 10:06, Weiwei Li wrote: >>>> Add a base pc_save for PC-relative translation(CF_PCREL). >>>> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb. >>>> We can get pc-relative address from following formula: >>>> real_pc = (old)env->pc + diff, where diff = target_pc - >>>> ctx->pc_save. >>>> Use gen_get_target_pc to compute target address of auipc and successor >>>> address of jalr and jal. >>>> >>>> The existence of CF_PCREL can improve performance with the guest >>>> kernel's address space randomization. Each guest process maps libc.so >>>> (et al) at a different virtual address, and this allows those >>>> translations to be shared. >>>> >>>> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> >>>> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> >>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> >>>> --- >>>> target/riscv/cpu.c | 29 +++++++++------ >>>> target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++-- >>> >>> Miss the process for trans_ebreak. >>> >>> I want to construct the PCREL feature on the processing of ctx pc >>> related fields, which is the reason why we need do specially >>> process. For example, >>> >>> static bool trans_auipc(DisasContext *ctx, arg_auipc *a) >>> { >>> - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >>> + if (tb_cflags(ctx->cflags) & CF_PCREL) { >>> + target_ulong pc_rel = ctx->base.pc_next - >>> ctx->base.pc_first + a->imm; >>> + gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel); >>> + } else { >>> + gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >>> + } >>> return true; >>> } >>> >>> +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv >>> t, target_ulong rel) >>> +{ >>> + TCGv dest = dest_gpr(ctx, reg_num); >>> + tcg_gen_addi_tl(dest, t, rel); >>> + gen_set_gpr(ctx, reg_num, dest); >>> +} >>> + >>> >>> But if it is too difficult to reuse the current implementation, your >>> implementation is also acceptable to me. >> >> Sorry, I don't get your idea. gen_pc_plus_diff() can do all the above >> job. > > Yes, I think so. I just suspect whether it is easy to read and verify > the correctness. And the maintenance for the future. > > > 1) Maybe we should split the PCREL to a split patch set, as it is a > new feature. The point masking can still use this thread. Point mask for instruction relies on PCREL. That's why I introduce PCREL in this patchset. Maybe we can divide this patch if needed. > > > 2) For the new patch set for PCREL, process where we need to modify > one by one. One clue for recognize where to modify is the ctx pc > related fields, such as pc_next/pc_first/succ_insn_pc. > > One thing may worth to try is that don't change the code in > insn_trans/trans_X. Just rename the origin API we need to modify to a > new name with _abs suffix. And and a correspond set of API for PCREL > with _pcrel suffix. > I don't find a good way to remain trans_* unchanged to support PCREL. > For example, in DisasContext, we define > > void (*gen_set_gpri)(DisasContext *ctx, int reg_num, target_long imm); > > In disas_init_fn, > > if (tb_cflags(tb) & CF_PCREL) { > gen_set_gpri = gen_set_gpri_pcrel; > } else { > gen_set_gpri = gen_set_gpri_abs; > } > Thus we can write the code in trans_insn without think about the PCREL. That's what I want. And PCREL also have been avoided in following code of trans_*. However, we should't update pc_next/pc_succ_insn related address into register directly by reusing existed API like gen_set_gpri. It's a general API to set gpr to any imm. However PC-relative only works for pc-related imm. Maybe we can introduce a new API like gen_set_gpr_pci() to set pc related imm. However it seems unnecessary, because it's no difference from current logic by using gen_pc_plus_diff() from the developer hand. Regards, Weiwei Li > > Thanks, > Zhiwei > >> >> Regards, >> >> Weiwei Li >> >>> >>> Zhiwei >>> >>>> target/riscv/translate.c | 48 ++++++++++++++++++++----- >>>> 3 files changed, 70 insertions(+), 21 deletions(-) >>>> >>>> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c >>>> index 1e97473af2..646fa31a59 100644 >>>> --- a/target/riscv/cpu.c >>>> +++ b/target/riscv/cpu.c >>>> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs) >>>> static void riscv_cpu_synchronize_from_tb(CPUState *cs, >>>> const TranslationBlock >>>> *tb) >>>> { >>>> - RISCVCPU *cpu = RISCV_CPU(cs); >>>> - CPURISCVState *env = &cpu->env; >>>> - RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >>>> + if (!(tb_cflags(tb) & CF_PCREL)) { >>>> + RISCVCPU *cpu = RISCV_CPU(cs); >>>> + CPURISCVState *env = &cpu->env; >>>> + RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >>>> - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >>>> + tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >>>> - if (xl == MXL_RV32) { >>>> - env->pc = (int32_t) tb->pc; >>>> - } else { >>>> - env->pc = tb->pc; >>>> + if (xl == MXL_RV32) { >>>> + env->pc = (int32_t) tb->pc; >>>> + } else { >>>> + env->pc = tb->pc; >>>> + } >>>> } >>>> } >>>> @@ -693,11 +695,18 @@ static void >>>> riscv_restore_state_to_opc(CPUState *cs, >>>> RISCVCPU *cpu = RISCV_CPU(cs); >>>> CPURISCVState *env = &cpu->env; >>>> RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >>>> + target_ulong pc; >>>> + >>>> + if (tb_cflags(tb) & CF_PCREL) { >>>> + pc = (env->pc & TARGET_PAGE_MASK) | data[0]; >>>> + } else { >>>> + pc = data[0]; >>>> + } >>>> if (xl == MXL_RV32) { >>>> - env->pc = (int32_t)data[0]; >>>> + env->pc = (int32_t)pc; >>>> } else { >>>> - env->pc = data[0]; >>>> + env->pc = pc; >>>> } >>>> env->bins = data[1]; >>>> } >>>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc >>>> b/target/riscv/insn_trans/trans_rvi.c.inc >>>> index cc72864d32..7cbbdac5aa 100644 >>>> --- a/target/riscv/insn_trans/trans_rvi.c.inc >>>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc >>>> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a) >>>> static bool trans_auipc(DisasContext *ctx, arg_auipc *a) >>>> { >>>> - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >>>> + TCGv target_pc = dest_gpr(ctx, a->rd); >>>> + gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next); >>>> + gen_set_gpr(ctx, a->rd, target_pc); >>>> return true; >>>> } >>>> @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, >>>> arg_jalr *a) >>>> { >>>> TCGLabel *misaligned = NULL; >>>> TCGv target_pc = tcg_temp_new(); >>>> + TCGv succ_pc = dest_gpr(ctx, a->rd); >>>> tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), >>>> a->imm); >>>> tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2); >>>> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, >>>> arg_jalr *a) >>>> tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned); >>>> } >>>> - gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn); >>>> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >>>> + gen_set_gpr(ctx, a->rd, succ_pc); >>>> + >>>> tcg_gen_mov_tl(cpu_pc, target_pc); >>>> lookup_and_goto_ptr(ctx); >>>> @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, >>>> arg_b *a, TCGCond cond) >>>> TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN); >>>> TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN); >>>> target_ulong next_pc; >>>> + target_ulong orig_pc_save = ctx->pc_save; >>>> if (get_xl(ctx) == MXL_RV128) { >>>> TCGv src1h = get_gprh(ctx, a->rs1); >>>> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b >>>> *a, TCGCond cond) >>>> gen_set_label(l); /* branch taken */ >>>> + ctx->pc_save = orig_pc_save; >>>> next_pc = ctx->base.pc_next + a->imm; >>>> if (!has_ext(ctx, RVC) && (next_pc & 0x3)) { >>>> /* misaligned */ >>>> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b >>>> *a, TCGCond cond) >>>> gen_pc_plus_diff(target_pc, ctx, next_pc); >>>> gen_exception_inst_addr_mis(ctx, target_pc); >>>> } else { >>>> - gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm); >>>> + gen_goto_tb(ctx, 0, next_pc); >>>> } >>>> + ctx->pc_save = -1; >>>> ctx->base.is_jmp = DISAS_NORETURN; >>>> return true; >>>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c >>>> index d434fedb37..4623749602 100644 >>>> --- a/target/riscv/translate.c >>>> +++ b/target/riscv/translate.c >>>> @@ -59,6 +59,7 @@ typedef struct DisasContext { >>>> DisasContextBase base; >>>> /* pc_succ_insn points to the instruction following >>>> base.pc_next */ >>>> target_ulong pc_succ_insn; >>>> + target_ulong pc_save; >>>> target_ulong priv_ver; >>>> RISCVMXL misa_mxl_max; >>>> RISCVMXL xl; >>>> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx) >>>> static void gen_pc_plus_diff(TCGv target, DisasContext *ctx, >>>> target_ulong dest) >>>> { >>>> - if (get_xl(ctx) == MXL_RV32) { >>>> - dest = (int32_t)dest; >>>> + assert(ctx->pc_save != -1); >>>> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >>>> + tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save); >>>> + if (get_xl(ctx) == MXL_RV32) { >>>> + tcg_gen_ext32s_tl(target, target); >>>> + } >>>> + } else { >>>> + if (get_xl(ctx) == MXL_RV32) { >>>> + dest = (int32_t)dest; >>>> + } >>>> + tcg_gen_movi_tl(target, dest); >>>> } >>>> - tcg_gen_movi_tl(target, dest); >>>> } >>>> static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest) >>>> { >>>> gen_pc_plus_diff(cpu_pc, ctx, dest); >>>> + ctx->pc_save = dest; >>>> } >>>> static void generate_exception(DisasContext *ctx, int excp) >>>> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int >>>> n, target_ulong dest) >>>> * direct block chain benefits will be small. >>>> */ >>>> if (translator_use_goto_tb(&ctx->base, dest) && >>>> !ctx->itrigger) { >>>> - tcg_gen_goto_tb(n); >>>> - gen_set_pc_imm(ctx, dest); >>>> + /* >>>> + * For pcrel, the pc must always be up-to-date on entry to >>>> + * the linked TB, so that it can use simple additions for all >>>> + * further adjustments. For !pcrel, the linked TB is >>>> compiled >>>> + * to know its full virtual address, so we can delay the >>>> + * update to pc to the unlinked path. A long chain of links >>>> + * can thus avoid many updates to the PC. >>>> + */ >>>> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >>>> + gen_set_pc_imm(ctx, dest); >>>> + tcg_gen_goto_tb(n); >>>> + } else { >>>> + tcg_gen_goto_tb(n); >>>> + gen_set_pc_imm(ctx, dest); >>>> + } >>>> tcg_gen_exit_tb(ctx->base.tb, n); >>>> } else { >>>> gen_set_pc_imm(ctx, dest); >>>> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, >>>> int reg_num, TCGv_i64 t) >>>> static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) >>>> { >>>> target_ulong next_pc; >>>> + TCGv succ_pc = dest_gpr(ctx, rd); >>>> /* check misaligned: */ >>>> next_pc = ctx->base.pc_next + imm; >>>> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, >>>> target_ulong imm) >>>> } >>>> } >>>> - gen_set_gpri(ctx, rd, ctx->pc_succ_insn); >>>> - gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this >>>> for safety */ >>>> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >>>> + gen_set_gpr(ctx, rd, succ_pc); >>>> + >>>> + gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */ >>>> ctx->base.is_jmp = DISAS_NORETURN; >>>> } >>>> @@ -1150,6 +1176,7 @@ static void >>>> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) >>>> RISCVCPU *cpu = RISCV_CPU(cs); >>>> uint32_t tb_flags = ctx->base.tb->flags; >>>> + ctx->pc_save = ctx->base.pc_first; >>>> ctx->pc_succ_insn = ctx->base.pc_first; >>>> ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX); >>>> ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; >>>> @@ -1195,8 +1222,13 @@ static void >>>> riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) >>>> static void riscv_tr_insn_start(DisasContextBase *dcbase, >>>> CPUState *cpu) >>>> { >>>> DisasContext *ctx = container_of(dcbase, DisasContext, base); >>>> + target_ulong pc_next = ctx->base.pc_next; >>>> + >>>> + if (tb_cflags(dcbase->tb) & CF_PCREL) { >>>> + pc_next &= ~TARGET_PAGE_MASK; >>>> + } >>>> - tcg_gen_insn_start(ctx->base.pc_next, 0); >>>> + tcg_gen_insn_start(pc_next, 0); >>>> ctx->insn_start = tcg_last_op(); >>>> }
On 2023/4/4 16:48, liweiwei wrote: > > On 2023/4/4 15:07, LIU Zhiwei wrote: >> >> On 2023/4/4 11:46, liweiwei wrote: >>> >>> On 2023/4/4 11:12, LIU Zhiwei wrote: >>>> >>>> On 2023/4/4 10:06, Weiwei Li wrote: >>>>> Add a base pc_save for PC-relative translation(CF_PCREL). >>>>> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb. >>>>> We can get pc-relative address from following formula: >>>>> real_pc = (old)env->pc + diff, where diff = target_pc - >>>>> ctx->pc_save. >>>>> Use gen_get_target_pc to compute target address of auipc and >>>>> successor >>>>> address of jalr and jal. >>>>> >>>>> The existence of CF_PCREL can improve performance with the guest >>>>> kernel's address space randomization. Each guest process maps >>>>> libc.so >>>>> (et al) at a different virtual address, and this allows those >>>>> translations to be shared. >>>>> >>>>> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> >>>>> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> >>>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> >>>>> --- >>>>> target/riscv/cpu.c | 29 +++++++++------ >>>>> target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++-- >>>> >>>> Miss the process for trans_ebreak. >>>> >>>> I want to construct the PCREL feature on the processing of ctx pc >>>> related fields, which is the reason why we need do specially >>>> process. For example, >>>> >>>> static bool trans_auipc(DisasContext *ctx, arg_auipc *a) >>>> { >>>> - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >>>> + if (tb_cflags(ctx->cflags) & CF_PCREL) { >>>> + target_ulong pc_rel = ctx->base.pc_next - >>>> ctx->base.pc_first + a->imm; >>>> + gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel); >>>> + } else { >>>> + gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >>>> + } >>>> return true; >>>> } >>>> >>>> +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv >>>> t, target_ulong rel) >>>> +{ >>>> + TCGv dest = dest_gpr(ctx, reg_num); >>>> + tcg_gen_addi_tl(dest, t, rel); >>>> + gen_set_gpr(ctx, reg_num, dest); >>>> +} >>>> + >>>> >>>> But if it is too difficult to reuse the current implementation, >>>> your implementation is also acceptable to me. >>> >>> Sorry, I don't get your idea. gen_pc_plus_diff() can do all the >>> above job. >> >> Yes, I think so. I just suspect whether it is easy to read and verify >> the correctness. And the maintenance for the future. >> >> >> 1) Maybe we should split the PCREL to a split patch set, as it is a >> new feature. The point masking can still use this thread. > > Point mask for instruction relies on PCREL. That's why I introduce > PCREL in this patchset. > > Maybe we can divide this patch if needed. If we won't use another way to rewrite the PCREL, we don't have to split it. > >> >> >> 2) For the new patch set for PCREL, process where we need to modify >> one by one. One clue for recognize where to modify is the ctx pc >> related fields, such as pc_next/pc_first/succ_insn_pc. >> >> One thing may worth to try is that don't change the code in >> insn_trans/trans_X. Just rename the origin API we need to modify to >> a new name with _abs suffix. And and a correspond set of API for >> PCREL with _pcrel suffix. >> > I don't find a good way to remain trans_* unchanged to support PCREL. >> For example, in DisasContext, we define >> >> void (*gen_set_gpri)(DisasContext *ctx, int reg_num, target_long imm); >> >> In disas_init_fn, >> >> if (tb_cflags(tb) & CF_PCREL) { >> gen_set_gpri = gen_set_gpri_pcrel; >> } else { >> gen_set_gpri = gen_set_gpri_abs; >> } Thus we can write the code in trans_insn without think about the >> PCREL. > > That's what I want. And PCREL also have been avoided in following > code of trans_*. > > However, we should't update pc_next/pc_succ_insn related address into > register directly by reusing existed API like gen_set_gpri. > > It's a general API to set gpr to any imm. However PC-relative only > works for pc-related imm. > > Maybe we can introduce a new API like gen_set_gpr_pci() to set pc > related imm. Yes, I think so, except _pci is not a good suffix. But I don't insist on this way. Zhiwei > > However it seems unnecessary, because it's no difference from current > logic by using gen_pc_plus_diff() from the developer hand. > > Regards, > > Weiwei Li > >> >> Thanks, >> Zhiwei >> >>> >>> Regards, >>> >>> Weiwei Li >>> >>>> >>>> Zhiwei >>>> >>>>> target/riscv/translate.c | 48 ++++++++++++++++++++----- >>>>> 3 files changed, 70 insertions(+), 21 deletions(-) >>>>> >>>>> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c >>>>> index 1e97473af2..646fa31a59 100644 >>>>> --- a/target/riscv/cpu.c >>>>> +++ b/target/riscv/cpu.c >>>>> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs) >>>>> static void riscv_cpu_synchronize_from_tb(CPUState *cs, >>>>> const TranslationBlock >>>>> *tb) >>>>> { >>>>> - RISCVCPU *cpu = RISCV_CPU(cs); >>>>> - CPURISCVState *env = &cpu->env; >>>>> - RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >>>>> + if (!(tb_cflags(tb) & CF_PCREL)) { >>>>> + RISCVCPU *cpu = RISCV_CPU(cs); >>>>> + CPURISCVState *env = &cpu->env; >>>>> + RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >>>>> - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >>>>> + tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); >>>>> - if (xl == MXL_RV32) { >>>>> - env->pc = (int32_t) tb->pc; >>>>> - } else { >>>>> - env->pc = tb->pc; >>>>> + if (xl == MXL_RV32) { >>>>> + env->pc = (int32_t) tb->pc; >>>>> + } else { >>>>> + env->pc = tb->pc; >>>>> + } >>>>> } >>>>> } >>>>> @@ -693,11 +695,18 @@ static void >>>>> riscv_restore_state_to_opc(CPUState *cs, >>>>> RISCVCPU *cpu = RISCV_CPU(cs); >>>>> CPURISCVState *env = &cpu->env; >>>>> RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); >>>>> + target_ulong pc; >>>>> + >>>>> + if (tb_cflags(tb) & CF_PCREL) { >>>>> + pc = (env->pc & TARGET_PAGE_MASK) | data[0]; >>>>> + } else { >>>>> + pc = data[0]; >>>>> + } >>>>> if (xl == MXL_RV32) { >>>>> - env->pc = (int32_t)data[0]; >>>>> + env->pc = (int32_t)pc; >>>>> } else { >>>>> - env->pc = data[0]; >>>>> + env->pc = pc; >>>>> } >>>>> env->bins = data[1]; >>>>> } >>>>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc >>>>> b/target/riscv/insn_trans/trans_rvi.c.inc >>>>> index cc72864d32..7cbbdac5aa 100644 >>>>> --- a/target/riscv/insn_trans/trans_rvi.c.inc >>>>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc >>>>> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui >>>>> *a) >>>>> static bool trans_auipc(DisasContext *ctx, arg_auipc *a) >>>>> { >>>>> - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >>>>> + TCGv target_pc = dest_gpr(ctx, a->rd); >>>>> + gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next); >>>>> + gen_set_gpr(ctx, a->rd, target_pc); >>>>> return true; >>>>> } >>>>> @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, >>>>> arg_jalr *a) >>>>> { >>>>> TCGLabel *misaligned = NULL; >>>>> TCGv target_pc = tcg_temp_new(); >>>>> + TCGv succ_pc = dest_gpr(ctx, a->rd); >>>>> tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), >>>>> a->imm); >>>>> tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2); >>>>> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, >>>>> arg_jalr *a) >>>>> tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned); >>>>> } >>>>> - gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn); >>>>> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >>>>> + gen_set_gpr(ctx, a->rd, succ_pc); >>>>> + >>>>> tcg_gen_mov_tl(cpu_pc, target_pc); >>>>> lookup_and_goto_ptr(ctx); >>>>> @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, >>>>> arg_b *a, TCGCond cond) >>>>> TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN); >>>>> TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN); >>>>> target_ulong next_pc; >>>>> + target_ulong orig_pc_save = ctx->pc_save; >>>>> if (get_xl(ctx) == MXL_RV128) { >>>>> TCGv src1h = get_gprh(ctx, a->rs1); >>>>> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, >>>>> arg_b *a, TCGCond cond) >>>>> gen_set_label(l); /* branch taken */ >>>>> + ctx->pc_save = orig_pc_save; >>>>> next_pc = ctx->base.pc_next + a->imm; >>>>> if (!has_ext(ctx, RVC) && (next_pc & 0x3)) { >>>>> /* misaligned */ >>>>> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, >>>>> arg_b *a, TCGCond cond) >>>>> gen_pc_plus_diff(target_pc, ctx, next_pc); >>>>> gen_exception_inst_addr_mis(ctx, target_pc); >>>>> } else { >>>>> - gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm); >>>>> + gen_goto_tb(ctx, 0, next_pc); >>>>> } >>>>> + ctx->pc_save = -1; >>>>> ctx->base.is_jmp = DISAS_NORETURN; >>>>> return true; >>>>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c >>>>> index d434fedb37..4623749602 100644 >>>>> --- a/target/riscv/translate.c >>>>> +++ b/target/riscv/translate.c >>>>> @@ -59,6 +59,7 @@ typedef struct DisasContext { >>>>> DisasContextBase base; >>>>> /* pc_succ_insn points to the instruction following >>>>> base.pc_next */ >>>>> target_ulong pc_succ_insn; >>>>> + target_ulong pc_save; >>>>> target_ulong priv_ver; >>>>> RISCVMXL misa_mxl_max; >>>>> RISCVMXL xl; >>>>> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx) >>>>> static void gen_pc_plus_diff(TCGv target, DisasContext *ctx, >>>>> target_ulong dest) >>>>> { >>>>> - if (get_xl(ctx) == MXL_RV32) { >>>>> - dest = (int32_t)dest; >>>>> + assert(ctx->pc_save != -1); >>>>> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >>>>> + tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save); >>>>> + if (get_xl(ctx) == MXL_RV32) { >>>>> + tcg_gen_ext32s_tl(target, target); >>>>> + } >>>>> + } else { >>>>> + if (get_xl(ctx) == MXL_RV32) { >>>>> + dest = (int32_t)dest; >>>>> + } >>>>> + tcg_gen_movi_tl(target, dest); >>>>> } >>>>> - tcg_gen_movi_tl(target, dest); >>>>> } >>>>> static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest) >>>>> { >>>>> gen_pc_plus_diff(cpu_pc, ctx, dest); >>>>> + ctx->pc_save = dest; >>>>> } >>>>> static void generate_exception(DisasContext *ctx, int excp) >>>>> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, >>>>> int n, target_ulong dest) >>>>> * direct block chain benefits will be small. >>>>> */ >>>>> if (translator_use_goto_tb(&ctx->base, dest) && >>>>> !ctx->itrigger) { >>>>> - tcg_gen_goto_tb(n); >>>>> - gen_set_pc_imm(ctx, dest); >>>>> + /* >>>>> + * For pcrel, the pc must always be up-to-date on entry to >>>>> + * the linked TB, so that it can use simple additions for >>>>> all >>>>> + * further adjustments. For !pcrel, the linked TB is >>>>> compiled >>>>> + * to know its full virtual address, so we can delay the >>>>> + * update to pc to the unlinked path. A long chain of links >>>>> + * can thus avoid many updates to the PC. >>>>> + */ >>>>> + if (tb_cflags(ctx->base.tb) & CF_PCREL) { >>>>> + gen_set_pc_imm(ctx, dest); >>>>> + tcg_gen_goto_tb(n); >>>>> + } else { >>>>> + tcg_gen_goto_tb(n); >>>>> + gen_set_pc_imm(ctx, dest); >>>>> + } >>>>> tcg_gen_exit_tb(ctx->base.tb, n); >>>>> } else { >>>>> gen_set_pc_imm(ctx, dest); >>>>> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, >>>>> int reg_num, TCGv_i64 t) >>>>> static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) >>>>> { >>>>> target_ulong next_pc; >>>>> + TCGv succ_pc = dest_gpr(ctx, rd); >>>>> /* check misaligned: */ >>>>> next_pc = ctx->base.pc_next + imm; >>>>> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int >>>>> rd, target_ulong imm) >>>>> } >>>>> } >>>>> - gen_set_gpri(ctx, rd, ctx->pc_succ_insn); >>>>> - gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use >>>>> this for safety */ >>>>> + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); >>>>> + gen_set_gpr(ctx, rd, succ_pc); >>>>> + >>>>> + gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */ >>>>> ctx->base.is_jmp = DISAS_NORETURN; >>>>> } >>>>> @@ -1150,6 +1176,7 @@ static void >>>>> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) >>>>> RISCVCPU *cpu = RISCV_CPU(cs); >>>>> uint32_t tb_flags = ctx->base.tb->flags; >>>>> + ctx->pc_save = ctx->base.pc_first; >>>>> ctx->pc_succ_insn = ctx->base.pc_first; >>>>> ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX); >>>>> ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; >>>>> @@ -1195,8 +1222,13 @@ static void >>>>> riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) >>>>> static void riscv_tr_insn_start(DisasContextBase *dcbase, >>>>> CPUState *cpu) >>>>> { >>>>> DisasContext *ctx = container_of(dcbase, DisasContext, base); >>>>> + target_ulong pc_next = ctx->base.pc_next; >>>>> + >>>>> + if (tb_cflags(dcbase->tb) & CF_PCREL) { >>>>> + pc_next &= ~TARGET_PAGE_MASK; >>>>> + } >>>>> - tcg_gen_insn_start(ctx->base.pc_next, 0); >>>>> + tcg_gen_insn_start(pc_next, 0); >>>>> ctx->insn_start = tcg_last_op(); >>>>> }
On 4/3/23 19:06, Weiwei Li wrote: > static bool trans_auipc(DisasContext *ctx, arg_auipc *a) > { > - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); > + TCGv target_pc = dest_gpr(ctx, a->rd); > + gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next); > + gen_set_gpr(ctx, a->rd, target_pc); > return true; > } This is not how I expect a function called "pc plus diff" to work. It should be simpler: TCGv rd = dest_gpr(ctx, a->rd); gen_pc_plus_diff(ctx, rd, a->imm); gen_set_gpr(ctx, a->rd, rd); All of the manipulation of cpu_pc, pc_save, and pc_next are all hidden inside the function. All that "add upper immediate to pc" should do is supply the immediate. r~
On 4/4/23 00:07, LIU Zhiwei wrote: > Yes, I think so. I just suspect whether it is easy to read and verify the correctness. And > the maintenance for the future. > > > 1) Maybe we should split the PCREL to a split patch set, as it is a new feature. The point > masking can still use this thread. Yes. > 2) For the new patch set for PCREL, process where we need to modify one by one. One clue > for recognize where to modify is the ctx pc related fields, such as > pc_next/pc_first/succ_insn_pc. > > One thing may worth to try is that don't change the code in insn_trans/trans_X. Just > rename the origin API we need to modify to a new name with _abs suffix. And and a > correspond set of API for PCREL with _pcrel suffix. > > For example, in DisasContext, we define > > void (*gen_set_gpri)(DisasContext *ctx, int reg_num, target_long imm); > > In disas_init_fn, > > if (tb_cflags(tb) & CF_PCREL) { > gen_set_gpri = gen_set_gpri_pcrel; > } else { > gen_set_gpri = gen_set_gpri_abs; > } > > Thus we can write the code in trans_insn without think about the PCREL. No. Please have a look at how the other conversions have progressed. E.g. https://lore.kernel.org/qemu-devel/20220930220312.135327-1-richard.henderson@linaro.org/ Step by step, each of the internal translation functions is converted from absolute to relative values. By operating on relative values, all knowledge of "pc" is centralized, and it simplifies the trans_* functions. r~
On 2023/4/4 21:56, Richard Henderson wrote: > On 4/3/23 19:06, Weiwei Li wrote: >> static bool trans_auipc(DisasContext *ctx, arg_auipc *a) >> { >> - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); >> + TCGv target_pc = dest_gpr(ctx, a->rd); >> + gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next); >> + gen_set_gpr(ctx, a->rd, target_pc); >> return true; >> } > > This is not how I expect a function called "pc plus diff" to work. Yeah, it's different from the similar function in ARM. However, it's more in line with the original RISC-V logic. Maybe we can change a name for the function, such as gen_pc_relative_address(). > It should be simpler: > > > TCGv rd = dest_gpr(ctx, a->rd); > > gen_pc_plus_diff(ctx, rd, a->imm); > gen_set_gpr(ctx, a->rd, rd); > > All of the manipulation of cpu_pc, pc_save, and pc_next are all hidden > inside the function. All that "add upper immediate to pc" should do > is supply the immediate. If we want to hide all of them in gen_pc_plus_diff, then we need calculate the diff for pc_succ_insn or introduce a new API for it, since we need get the successor pc in many instructions. And the logic for gen_goto_tb or gen_set_pc_imm also need update. Regards, Weiwei Li > > > r~
On 4/4/23 07:33, liweiwei wrote: > If we want to hide all of them in gen_pc_plus_diff, then we need calculate the diff for > pc_succ_insn or introduce a new API for it, since we need get the successor pc in many > instructions. > > And the logic for gen_goto_tb or gen_set_pc_imm also need update. Yes, exactly. r~
On 2023/4/4 22:57, Richard Henderson wrote: > On 4/4/23 07:33, liweiwei wrote: >> If we want to hide all of them in gen_pc_plus_diff, then we need >> calculate the diff for pc_succ_insn or introduce a new API for it, >> since we need get the successor pc in many instructions. >> >> And the logic for gen_goto_tb or gen_set_pc_imm also need update. > > Yes, exactly. > > Sorry, I didn't find benefits from this. If we do this, we'll firstly calculate the diff = pc_succ_insn - pc_next, then we add it with pc_next - pc_save to get the relative address to env->pc. This seems make things more complicated. Regards, Weiwei Li > r~
On 4/4/23 08:14, liweiwei wrote: > > On 2023/4/4 22:57, Richard Henderson wrote: >> On 4/4/23 07:33, liweiwei wrote: >>> If we want to hide all of them in gen_pc_plus_diff, then we need calculate the diff >>> for pc_succ_insn or introduce a new API for it, since we need get the successor pc in >>> many instructions. >>> >>> And the logic for gen_goto_tb or gen_set_pc_imm also need update. >> >> Yes, exactly. >> >> > Sorry, I didn't find benefits from this. If we do this, we'll firstly calculate the diff = > pc_succ_insn - pc_next, then we add it with pc_next - pc_save to get the relative address > to env->pc. It will me simpler because you'll move all of the calculations into a helper function. The trans_* functions will be supplying a immediate directly: * for auipc, this is a->imm, * for jalr, this is 0. r~
On 2023/4/4 23:27, Richard Henderson wrote: > On 4/4/23 08:14, liweiwei wrote: >> >> On 2023/4/4 22:57, Richard Henderson wrote: >>> On 4/4/23 07:33, liweiwei wrote: >>>> If we want to hide all of them in gen_pc_plus_diff, then we need >>>> calculate the diff for pc_succ_insn or introduce a new API for it, >>>> since we need get the successor pc in many instructions. >>>> >>>> And the logic for gen_goto_tb or gen_set_pc_imm also need update. >>> >>> Yes, exactly. >>> >>> >> Sorry, I didn't find benefits from this. If we do this, we'll firstly >> calculate the diff = pc_succ_insn - pc_next, then we add it with >> pc_next - pc_save to get the relative address to env->pc. > > It will me simpler because you'll move all of the calculations into a > helper function. helper? Do you mean gen_pc_plus_diff? > > The trans_* functions will be supplying a immediate directly: > > * for auipc, this is a->imm, Yeah. this will be simpler in trans_, however the total calculation is the same. we just move a->imm + pc_next to gen_pc_plus_diff. > * for jalr, this is 0. Not 0, but pc_succ_insn - pc_next. This may be the case in many place. Regards, Weiwei Li > > > r~
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 1e97473af2..646fa31a59 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs) static void riscv_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { - RISCVCPU *cpu = RISCV_CPU(cs); - CPURISCVState *env = &cpu->env; - RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); + if (!(tb_cflags(tb) & CF_PCREL)) { + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; + RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); - if (xl == MXL_RV32) { - env->pc = (int32_t) tb->pc; - } else { - env->pc = tb->pc; + if (xl == MXL_RV32) { + env->pc = (int32_t) tb->pc; + } else { + env->pc = tb->pc; + } } } @@ -693,11 +695,18 @@ static void riscv_restore_state_to_opc(CPUState *cs, RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); + target_ulong pc; + + if (tb_cflags(tb) & CF_PCREL) { + pc = (env->pc & TARGET_PAGE_MASK) | data[0]; + } else { + pc = data[0]; + } if (xl == MXL_RV32) { - env->pc = (int32_t)data[0]; + env->pc = (int32_t)pc; } else { - env->pc = data[0]; + env->pc = pc; } env->bins = data[1]; } diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc index cc72864d32..7cbbdac5aa 100644 --- a/target/riscv/insn_trans/trans_rvi.c.inc +++ b/target/riscv/insn_trans/trans_rvi.c.inc @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a) static bool trans_auipc(DisasContext *ctx, arg_auipc *a) { - gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next); + TCGv target_pc = dest_gpr(ctx, a->rd); + gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next); + gen_set_gpr(ctx, a->rd, target_pc); return true; } @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a) { TCGLabel *misaligned = NULL; TCGv target_pc = tcg_temp_new(); + TCGv succ_pc = dest_gpr(ctx, a->rd); tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), a->imm); tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2); @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a) tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned); } - gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn); + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); + gen_set_gpr(ctx, a->rd, succ_pc); + tcg_gen_mov_tl(cpu_pc, target_pc); lookup_and_goto_ptr(ctx); @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond) TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN); TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN); target_ulong next_pc; + target_ulong orig_pc_save = ctx->pc_save; if (get_xl(ctx) == MXL_RV128) { TCGv src1h = get_gprh(ctx, a->rs1); @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond) gen_set_label(l); /* branch taken */ + ctx->pc_save = orig_pc_save; next_pc = ctx->base.pc_next + a->imm; if (!has_ext(ctx, RVC) && (next_pc & 0x3)) { /* misaligned */ @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond) gen_pc_plus_diff(target_pc, ctx, next_pc); gen_exception_inst_addr_mis(ctx, target_pc); } else { - gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm); + gen_goto_tb(ctx, 0, next_pc); } + ctx->pc_save = -1; ctx->base.is_jmp = DISAS_NORETURN; return true; diff --git a/target/riscv/translate.c b/target/riscv/translate.c index d434fedb37..4623749602 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -59,6 +59,7 @@ typedef struct DisasContext { DisasContextBase base; /* pc_succ_insn points to the instruction following base.pc_next */ target_ulong pc_succ_insn; + target_ulong pc_save; target_ulong priv_ver; RISCVMXL misa_mxl_max; RISCVMXL xl; @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx) static void gen_pc_plus_diff(TCGv target, DisasContext *ctx, target_ulong dest) { - if (get_xl(ctx) == MXL_RV32) { - dest = (int32_t)dest; + assert(ctx->pc_save != -1); + if (tb_cflags(ctx->base.tb) & CF_PCREL) { + tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save); + if (get_xl(ctx) == MXL_RV32) { + tcg_gen_ext32s_tl(target, target); + } + } else { + if (get_xl(ctx) == MXL_RV32) { + dest = (int32_t)dest; + } + tcg_gen_movi_tl(target, dest); } - tcg_gen_movi_tl(target, dest); } static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest) { gen_pc_plus_diff(cpu_pc, ctx, dest); + ctx->pc_save = dest; } static void generate_exception(DisasContext *ctx, int excp) @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) * direct block chain benefits will be small. */ if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) { - tcg_gen_goto_tb(n); - gen_set_pc_imm(ctx, dest); + /* + * For pcrel, the pc must always be up-to-date on entry to + * the linked TB, so that it can use simple additions for all + * further adjustments. For !pcrel, the linked TB is compiled + * to know its full virtual address, so we can delay the + * update to pc to the unlinked path. A long chain of links + * can thus avoid many updates to the PC. + */ + if (tb_cflags(ctx->base.tb) & CF_PCREL) { + gen_set_pc_imm(ctx, dest); + tcg_gen_goto_tb(n); + } else { + tcg_gen_goto_tb(n); + gen_set_pc_imm(ctx, dest); + } tcg_gen_exit_tb(ctx->base.tb, n); } else { gen_set_pc_imm(ctx, dest); @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, int reg_num, TCGv_i64 t) static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) { target_ulong next_pc; + TCGv succ_pc = dest_gpr(ctx, rd); /* check misaligned: */ next_pc = ctx->base.pc_next + imm; @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) } } - gen_set_gpri(ctx, rd, ctx->pc_succ_insn); - gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this for safety */ + gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn); + gen_set_gpr(ctx, rd, succ_pc); + + gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */ ctx->base.is_jmp = DISAS_NORETURN; } @@ -1150,6 +1176,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) RISCVCPU *cpu = RISCV_CPU(cs); uint32_t tb_flags = ctx->base.tb->flags; + ctx->pc_save = ctx->base.pc_first; ctx->pc_succ_insn = ctx->base.pc_first; ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX); ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; @@ -1195,8 +1222,13 @@ static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *ctx = container_of(dcbase, DisasContext, base); + target_ulong pc_next = ctx->base.pc_next; + + if (tb_cflags(dcbase->tb) & CF_PCREL) { + pc_next &= ~TARGET_PAGE_MASK; + } - tcg_gen_insn_start(ctx->base.pc_next, 0); + tcg_gen_insn_start(pc_next, 0); ctx->insn_start = tcg_last_op(); }