diff mbox series

[v6,4/6] target/riscv: Add support for PC-relative translation

Message ID 20230404020653.18911-5-liweiwei@iscas.ac.cn (mailing list archive)
State New, archived
Headers show
Series target/riscv: Fix pointer mask related support | expand

Commit Message

Weiwei Li April 4, 2023, 2:06 a.m. UTC
Add a base pc_save for PC-relative translation(CF_PCREL).
Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb.
We can get pc-relative address from following formula:
  real_pc = (old)env->pc + diff, where diff = target_pc - ctx->pc_save.
Use gen_get_target_pc to compute target address of auipc and successor
address of jalr and jal.

The existence of CF_PCREL can improve performance with the guest
kernel's address space randomization.  Each guest process maps libc.so
(et al) at a different virtual address, and this allows those
translations to be shared.

Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/riscv/cpu.c                      | 29 +++++++++------
 target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++--
 target/riscv/translate.c                | 48 ++++++++++++++++++++-----
 3 files changed, 70 insertions(+), 21 deletions(-)

Comments

LIU Zhiwei April 4, 2023, 3:12 a.m. UTC | #1
On 2023/4/4 10:06, Weiwei Li wrote:
> Add a base pc_save for PC-relative translation(CF_PCREL).
> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb.
> We can get pc-relative address from following formula:
>    real_pc = (old)env->pc + diff, where diff = target_pc - ctx->pc_save.
> Use gen_get_target_pc to compute target address of auipc and successor
> address of jalr and jal.
>
> The existence of CF_PCREL can improve performance with the guest
> kernel's address space randomization.  Each guest process maps libc.so
> (et al) at a different virtual address, and this allows those
> translations to be shared.
>
> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/riscv/cpu.c                      | 29 +++++++++------
>   target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++--

Miss the process for trans_ebreak.

I want to construct the PCREL feature on the processing of ctx pc 
related fields, which is the reason why we need do specially process. 
For example,

  static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
  {
-    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
+    if (tb_cflags(ctx->cflags) & CF_PCREL) {
+        target_ulong pc_rel = ctx->base.pc_next - ctx->base.pc_first + 
a->imm;
+        gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel);
+    } else {
+        gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
+    }
      return true;
  }

+static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv t, 
target_ulong rel)
+{
+    TCGv dest = dest_gpr(ctx, reg_num);
+    tcg_gen_addi_tl(dest, t, rel);
+    gen_set_gpr(ctx, reg_num, dest);
+}
+

But if it is too difficult to reuse the current implementation, your 
implementation is also acceptable to me.

Zhiwei

>   target/riscv/translate.c                | 48 ++++++++++++++++++++-----
>   3 files changed, 70 insertions(+), 21 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 1e97473af2..646fa31a59 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
>   static void riscv_cpu_synchronize_from_tb(CPUState *cs,
>                                             const TranslationBlock *tb)
>   {
> -    RISCVCPU *cpu = RISCV_CPU(cs);
> -    CPURISCVState *env = &cpu->env;
> -    RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
> +    if (!(tb_cflags(tb) & CF_PCREL)) {
> +        RISCVCPU *cpu = RISCV_CPU(cs);
> +        CPURISCVState *env = &cpu->env;
> +        RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>   
> -    tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
> +        tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>   
> -    if (xl == MXL_RV32) {
> -        env->pc = (int32_t) tb->pc;
> -    } else {
> -        env->pc = tb->pc;
> +        if (xl == MXL_RV32) {
> +            env->pc = (int32_t) tb->pc;
> +        } else {
> +            env->pc = tb->pc;
> +        }
>       }
>   }
>   
> @@ -693,11 +695,18 @@ static void riscv_restore_state_to_opc(CPUState *cs,
>       RISCVCPU *cpu = RISCV_CPU(cs);
>       CPURISCVState *env = &cpu->env;
>       RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
> +    target_ulong pc;
> +
> +    if (tb_cflags(tb) & CF_PCREL) {
> +        pc = (env->pc & TARGET_PAGE_MASK) | data[0];
> +    } else {
> +        pc = data[0];
> +    }
>   
>       if (xl == MXL_RV32) {
> -        env->pc = (int32_t)data[0];
> +        env->pc = (int32_t)pc;
>       } else {
> -        env->pc = data[0];
> +        env->pc = pc;
>       }
>       env->bins = data[1];
>   }
> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
> index cc72864d32..7cbbdac5aa 100644
> --- a/target/riscv/insn_trans/trans_rvi.c.inc
> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a)
>   
>   static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>   {
> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
> +    TCGv target_pc = dest_gpr(ctx, a->rd);
> +    gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next);
> +    gen_set_gpr(ctx, a->rd, target_pc);
>       return true;
>   }
>   
> @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
>   {
>       TCGLabel *misaligned = NULL;
>       TCGv target_pc = tcg_temp_new();
> +    TCGv succ_pc = dest_gpr(ctx, a->rd);
>   
>       tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), a->imm);
>       tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2);
> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
>           tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned);
>       }
>   
> -    gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn);
> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
> +    gen_set_gpr(ctx, a->rd, succ_pc);
> +
>       tcg_gen_mov_tl(cpu_pc, target_pc);
>       lookup_and_goto_ptr(ctx);
>   
> @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
>       TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
>       TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
>       target_ulong next_pc;
> +    target_ulong orig_pc_save = ctx->pc_save;
>   
>       if (get_xl(ctx) == MXL_RV128) {
>           TCGv src1h = get_gprh(ctx, a->rs1);
> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
>   
>       gen_set_label(l); /* branch taken */
>   
> +    ctx->pc_save = orig_pc_save;
>       next_pc = ctx->base.pc_next + a->imm;
>       if (!has_ext(ctx, RVC) && (next_pc & 0x3)) {
>           /* misaligned */
> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
>           gen_pc_plus_diff(target_pc, ctx, next_pc);
>           gen_exception_inst_addr_mis(ctx, target_pc);
>       } else {
> -        gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm);
> +        gen_goto_tb(ctx, 0, next_pc);
>       }
> +    ctx->pc_save = -1;
>       ctx->base.is_jmp = DISAS_NORETURN;
>   
>       return true;
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index d434fedb37..4623749602 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -59,6 +59,7 @@ typedef struct DisasContext {
>       DisasContextBase base;
>       /* pc_succ_insn points to the instruction following base.pc_next */
>       target_ulong pc_succ_insn;
> +    target_ulong pc_save;
>       target_ulong priv_ver;
>       RISCVMXL misa_mxl_max;
>       RISCVMXL xl;
> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx)
>   static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
>                                 target_ulong dest)
>   {
> -    if (get_xl(ctx) == MXL_RV32) {
> -        dest = (int32_t)dest;
> +    assert(ctx->pc_save != -1);
> +    if (tb_cflags(ctx->base.tb) & CF_PCREL) {
> +        tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save);
> +        if (get_xl(ctx) == MXL_RV32) {
> +            tcg_gen_ext32s_tl(target, target);
> +        }
> +    } else {
> +        if (get_xl(ctx) == MXL_RV32) {
> +            dest = (int32_t)dest;
> +        }
> +        tcg_gen_movi_tl(target, dest);
>       }
> -    tcg_gen_movi_tl(target, dest);
>   }
>   
>   static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest)
>   {
>       gen_pc_plus_diff(cpu_pc, ctx, dest);
> +    ctx->pc_save = dest;
>   }
>   
>   static void generate_exception(DisasContext *ctx, int excp)
> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
>         * direct block chain benefits will be small.
>         */
>       if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) {
> -        tcg_gen_goto_tb(n);
> -        gen_set_pc_imm(ctx, dest);
> +        /*
> +         * For pcrel, the pc must always be up-to-date on entry to
> +         * the linked TB, so that it can use simple additions for all
> +         * further adjustments.  For !pcrel, the linked TB is compiled
> +         * to know its full virtual address, so we can delay the
> +         * update to pc to the unlinked path.  A long chain of links
> +         * can thus avoid many updates to the PC.
> +         */
> +        if (tb_cflags(ctx->base.tb) & CF_PCREL) {
> +            gen_set_pc_imm(ctx, dest);
> +            tcg_gen_goto_tb(n);
> +        } else {
> +            tcg_gen_goto_tb(n);
> +            gen_set_pc_imm(ctx, dest);
> +        }
>           tcg_gen_exit_tb(ctx->base.tb, n);
>       } else {
>           gen_set_pc_imm(ctx, dest);
> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, int reg_num, TCGv_i64 t)
>   static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
>   {
>       target_ulong next_pc;
> +    TCGv succ_pc = dest_gpr(ctx, rd);
>   
>       /* check misaligned: */
>       next_pc = ctx->base.pc_next + imm;
> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
>           }
>       }
>   
> -    gen_set_gpri(ctx, rd, ctx->pc_succ_insn);
> -    gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this for safety */
> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
> +    gen_set_gpr(ctx, rd, succ_pc);
> +
> +    gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */
>       ctx->base.is_jmp = DISAS_NORETURN;
>   }
>   
> @@ -1150,6 +1176,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>       RISCVCPU *cpu = RISCV_CPU(cs);
>       uint32_t tb_flags = ctx->base.tb->flags;
>   
> +    ctx->pc_save = ctx->base.pc_first;
>       ctx->pc_succ_insn = ctx->base.pc_first;
>       ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
>       ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
> @@ -1195,8 +1222,13 @@ static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
>   static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
>   {
>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
> +    target_ulong pc_next = ctx->base.pc_next;
> +
> +    if (tb_cflags(dcbase->tb) & CF_PCREL) {
> +        pc_next &= ~TARGET_PAGE_MASK;
> +    }
>   
> -    tcg_gen_insn_start(ctx->base.pc_next, 0);
> +    tcg_gen_insn_start(pc_next, 0);
>       ctx->insn_start = tcg_last_op();
>   }
>
LIU Zhiwei April 4, 2023, 3:25 a.m. UTC | #2
On 2023/4/4 11:12, LIU Zhiwei wrote:
>
> On 2023/4/4 10:06, Weiwei Li wrote:
>> Add a base pc_save for PC-relative translation(CF_PCREL).
>> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb.
>> We can get pc-relative address from following formula:
>>    real_pc = (old)env->pc + diff, where diff = target_pc - ctx->pc_save.
>> Use gen_get_target_pc to compute target address of auipc and successor
>> address of jalr and jal.
>>
>> The existence of CF_PCREL can improve performance with the guest
>> kernel's address space randomization.  Each guest process maps libc.so
>> (et al) at a different virtual address, and this allows those
>> translations to be shared.
>>
>> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
>> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   target/riscv/cpu.c                      | 29 +++++++++------
>>   target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++--
>
> Miss the process for trans_ebreak.

Please ignore this sentence comment. It will not influence the codegen.

Zhiwei

>
> I want to construct the PCREL feature on the processing of ctx pc 
> related fields, which is the reason why we need do specially process. 
> For example,
>
>  static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>  {
> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
> +    if (tb_cflags(ctx->cflags) & CF_PCREL) {
> +        target_ulong pc_rel = ctx->base.pc_next - ctx->base.pc_first 
> + a->imm;
> +        gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel);
> +    } else {
> +        gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
> +    }
>      return true;
>  }
>
> +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv t, 
> target_ulong rel)
> +{
> +    TCGv dest = dest_gpr(ctx, reg_num);
> +    tcg_gen_addi_tl(dest, t, rel);
> +    gen_set_gpr(ctx, reg_num, dest);
> +}
> +
>
> But if it is too difficult to reuse the current implementation, your 
> implementation is also acceptable to me.
>
> Zhiwei
>
>>   target/riscv/translate.c | 48 ++++++++++++++++++++-----
>>   3 files changed, 70 insertions(+), 21 deletions(-)
>>
>> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
>> index 1e97473af2..646fa31a59 100644
>> --- a/target/riscv/cpu.c
>> +++ b/target/riscv/cpu.c
>> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
>>   static void riscv_cpu_synchronize_from_tb(CPUState *cs,
>>                                             const TranslationBlock *tb)
>>   {
>> -    RISCVCPU *cpu = RISCV_CPU(cs);
>> -    CPURISCVState *env = &cpu->env;
>> -    RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>> +    if (!(tb_cflags(tb) & CF_PCREL)) {
>> +        RISCVCPU *cpu = RISCV_CPU(cs);
>> +        CPURISCVState *env = &cpu->env;
>> +        RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>   -    tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>> +        tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>>   -    if (xl == MXL_RV32) {
>> -        env->pc = (int32_t) tb->pc;
>> -    } else {
>> -        env->pc = tb->pc;
>> +        if (xl == MXL_RV32) {
>> +            env->pc = (int32_t) tb->pc;
>> +        } else {
>> +            env->pc = tb->pc;
>> +        }
>>       }
>>   }
>>   @@ -693,11 +695,18 @@ static void 
>> riscv_restore_state_to_opc(CPUState *cs,
>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>       CPURISCVState *env = &cpu->env;
>>       RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>> +    target_ulong pc;
>> +
>> +    if (tb_cflags(tb) & CF_PCREL) {
>> +        pc = (env->pc & TARGET_PAGE_MASK) | data[0];
>> +    } else {
>> +        pc = data[0];
>> +    }
>>         if (xl == MXL_RV32) {
>> -        env->pc = (int32_t)data[0];
>> +        env->pc = (int32_t)pc;
>>       } else {
>> -        env->pc = data[0];
>> +        env->pc = pc;
>>       }
>>       env->bins = data[1];
>>   }
>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
>> b/target/riscv/insn_trans/trans_rvi.c.inc
>> index cc72864d32..7cbbdac5aa 100644
>> --- a/target/riscv/insn_trans/trans_rvi.c.inc
>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
>> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a)
>>     static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>>   {
>> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>> +    TCGv target_pc = dest_gpr(ctx, a->rd);
>> +    gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next);
>> +    gen_set_gpr(ctx, a->rd, target_pc);
>>       return true;
>>   }
>>   @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, 
>> arg_jalr *a)
>>   {
>>       TCGLabel *misaligned = NULL;
>>       TCGv target_pc = tcg_temp_new();
>> +    TCGv succ_pc = dest_gpr(ctx, a->rd);
>>         tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), 
>> a->imm);
>>       tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2);
>> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
>>           tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned);
>>       }
>>   -    gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn);
>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>> +    gen_set_gpr(ctx, a->rd, succ_pc);
>> +
>>       tcg_gen_mov_tl(cpu_pc, target_pc);
>>       lookup_and_goto_ptr(ctx);
>>   @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>> *a, TCGCond cond)
>>       TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
>>       TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
>>       target_ulong next_pc;
>> +    target_ulong orig_pc_save = ctx->pc_save;
>>         if (get_xl(ctx) == MXL_RV128) {
>>           TCGv src1h = get_gprh(ctx, a->rs1);
>> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>> *a, TCGCond cond)
>>         gen_set_label(l); /* branch taken */
>>   +    ctx->pc_save = orig_pc_save;
>>       next_pc = ctx->base.pc_next + a->imm;
>>       if (!has_ext(ctx, RVC) && (next_pc & 0x3)) {
>>           /* misaligned */
>> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>> *a, TCGCond cond)
>>           gen_pc_plus_diff(target_pc, ctx, next_pc);
>>           gen_exception_inst_addr_mis(ctx, target_pc);
>>       } else {
>> -        gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm);
>> +        gen_goto_tb(ctx, 0, next_pc);
>>       }
>> +    ctx->pc_save = -1;
>>       ctx->base.is_jmp = DISAS_NORETURN;
>>         return true;
>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>> index d434fedb37..4623749602 100644
>> --- a/target/riscv/translate.c
>> +++ b/target/riscv/translate.c
>> @@ -59,6 +59,7 @@ typedef struct DisasContext {
>>       DisasContextBase base;
>>       /* pc_succ_insn points to the instruction following 
>> base.pc_next */
>>       target_ulong pc_succ_insn;
>> +    target_ulong pc_save;
>>       target_ulong priv_ver;
>>       RISCVMXL misa_mxl_max;
>>       RISCVMXL xl;
>> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx)
>>   static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
>>                                 target_ulong dest)
>>   {
>> -    if (get_xl(ctx) == MXL_RV32) {
>> -        dest = (int32_t)dest;
>> +    assert(ctx->pc_save != -1);
>> +    if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>> +        tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save);
>> +        if (get_xl(ctx) == MXL_RV32) {
>> +            tcg_gen_ext32s_tl(target, target);
>> +        }
>> +    } else {
>> +        if (get_xl(ctx) == MXL_RV32) {
>> +            dest = (int32_t)dest;
>> +        }
>> +        tcg_gen_movi_tl(target, dest);
>>       }
>> -    tcg_gen_movi_tl(target, dest);
>>   }
>>     static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest)
>>   {
>>       gen_pc_plus_diff(cpu_pc, ctx, dest);
>> +    ctx->pc_save = dest;
>>   }
>>     static void generate_exception(DisasContext *ctx, int excp)
>> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int 
>> n, target_ulong dest)
>>         * direct block chain benefits will be small.
>>         */
>>       if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) {
>> -        tcg_gen_goto_tb(n);
>> -        gen_set_pc_imm(ctx, dest);
>> +        /*
>> +         * For pcrel, the pc must always be up-to-date on entry to
>> +         * the linked TB, so that it can use simple additions for all
>> +         * further adjustments.  For !pcrel, the linked TB is compiled
>> +         * to know its full virtual address, so we can delay the
>> +         * update to pc to the unlinked path.  A long chain of links
>> +         * can thus avoid many updates to the PC.
>> +         */
>> +        if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>> +            gen_set_pc_imm(ctx, dest);
>> +            tcg_gen_goto_tb(n);
>> +        } else {
>> +            tcg_gen_goto_tb(n);
>> +            gen_set_pc_imm(ctx, dest);
>> +        }
>>           tcg_gen_exit_tb(ctx->base.tb, n);
>>       } else {
>>           gen_set_pc_imm(ctx, dest);
>> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, int 
>> reg_num, TCGv_i64 t)
>>   static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
>>   {
>>       target_ulong next_pc;
>> +    TCGv succ_pc = dest_gpr(ctx, rd);
>>         /* check misaligned: */
>>       next_pc = ctx->base.pc_next + imm;
>> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, 
>> target_ulong imm)
>>           }
>>       }
>>   -    gen_set_gpri(ctx, rd, ctx->pc_succ_insn);
>> -    gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this 
>> for safety */
>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>> +    gen_set_gpr(ctx, rd, succ_pc);
>> +
>> +    gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */
>>       ctx->base.is_jmp = DISAS_NORETURN;
>>   }
>>   @@ -1150,6 +1176,7 @@ static void 
>> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>       uint32_t tb_flags = ctx->base.tb->flags;
>>   +    ctx->pc_save = ctx->base.pc_first;
>>       ctx->pc_succ_insn = ctx->base.pc_first;
>>       ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
>>       ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
>> @@ -1195,8 +1222,13 @@ static void riscv_tr_tb_start(DisasContextBase 
>> *db, CPUState *cpu)
>>   static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState 
>> *cpu)
>>   {
>>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
>> +    target_ulong pc_next = ctx->base.pc_next;
>> +
>> +    if (tb_cflags(dcbase->tb) & CF_PCREL) {
>> +        pc_next &= ~TARGET_PAGE_MASK;
>> +    }
>>   -    tcg_gen_insn_start(ctx->base.pc_next, 0);
>> +    tcg_gen_insn_start(pc_next, 0);
>>       ctx->insn_start = tcg_last_op();
>>   }
Weiwei Li April 4, 2023, 3:46 a.m. UTC | #3
On 2023/4/4 11:12, LIU Zhiwei wrote:
>
> On 2023/4/4 10:06, Weiwei Li wrote:
>> Add a base pc_save for PC-relative translation(CF_PCREL).
>> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb.
>> We can get pc-relative address from following formula:
>>    real_pc = (old)env->pc + diff, where diff = target_pc - ctx->pc_save.
>> Use gen_get_target_pc to compute target address of auipc and successor
>> address of jalr and jal.
>>
>> The existence of CF_PCREL can improve performance with the guest
>> kernel's address space randomization.  Each guest process maps libc.so
>> (et al) at a different virtual address, and this allows those
>> translations to be shared.
>>
>> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
>> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   target/riscv/cpu.c                      | 29 +++++++++------
>>   target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++--
>
> Miss the process for trans_ebreak.
>
> I want to construct the PCREL feature on the processing of ctx pc 
> related fields, which is the reason why we need do specially process. 
> For example,
>
>  static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>  {
> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
> +    if (tb_cflags(ctx->cflags) & CF_PCREL) {
> +        target_ulong pc_rel = ctx->base.pc_next - ctx->base.pc_first 
> + a->imm;
> +        gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel);
> +    } else {
> +        gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
> +    }
>      return true;
>  }
>
> +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv t, 
> target_ulong rel)
> +{
> +    TCGv dest = dest_gpr(ctx, reg_num);
> +    tcg_gen_addi_tl(dest, t, rel);
> +    gen_set_gpr(ctx, reg_num, dest);
> +}
> +
>
> But if it is too difficult to reuse the current implementation, your 
> implementation is also acceptable to me.

Sorry, I don't get your idea. gen_pc_plus_diff() can do all the above job.

Regards,

Weiwei Li

>
> Zhiwei
>
>>   target/riscv/translate.c | 48 ++++++++++++++++++++-----
>>   3 files changed, 70 insertions(+), 21 deletions(-)
>>
>> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
>> index 1e97473af2..646fa31a59 100644
>> --- a/target/riscv/cpu.c
>> +++ b/target/riscv/cpu.c
>> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
>>   static void riscv_cpu_synchronize_from_tb(CPUState *cs,
>>                                             const TranslationBlock *tb)
>>   {
>> -    RISCVCPU *cpu = RISCV_CPU(cs);
>> -    CPURISCVState *env = &cpu->env;
>> -    RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>> +    if (!(tb_cflags(tb) & CF_PCREL)) {
>> +        RISCVCPU *cpu = RISCV_CPU(cs);
>> +        CPURISCVState *env = &cpu->env;
>> +        RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>   -    tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>> +        tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>>   -    if (xl == MXL_RV32) {
>> -        env->pc = (int32_t) tb->pc;
>> -    } else {
>> -        env->pc = tb->pc;
>> +        if (xl == MXL_RV32) {
>> +            env->pc = (int32_t) tb->pc;
>> +        } else {
>> +            env->pc = tb->pc;
>> +        }
>>       }
>>   }
>>   @@ -693,11 +695,18 @@ static void 
>> riscv_restore_state_to_opc(CPUState *cs,
>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>       CPURISCVState *env = &cpu->env;
>>       RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>> +    target_ulong pc;
>> +
>> +    if (tb_cflags(tb) & CF_PCREL) {
>> +        pc = (env->pc & TARGET_PAGE_MASK) | data[0];
>> +    } else {
>> +        pc = data[0];
>> +    }
>>         if (xl == MXL_RV32) {
>> -        env->pc = (int32_t)data[0];
>> +        env->pc = (int32_t)pc;
>>       } else {
>> -        env->pc = data[0];
>> +        env->pc = pc;
>>       }
>>       env->bins = data[1];
>>   }
>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
>> b/target/riscv/insn_trans/trans_rvi.c.inc
>> index cc72864d32..7cbbdac5aa 100644
>> --- a/target/riscv/insn_trans/trans_rvi.c.inc
>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
>> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a)
>>     static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>>   {
>> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>> +    TCGv target_pc = dest_gpr(ctx, a->rd);
>> +    gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next);
>> +    gen_set_gpr(ctx, a->rd, target_pc);
>>       return true;
>>   }
>>   @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, 
>> arg_jalr *a)
>>   {
>>       TCGLabel *misaligned = NULL;
>>       TCGv target_pc = tcg_temp_new();
>> +    TCGv succ_pc = dest_gpr(ctx, a->rd);
>>         tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), 
>> a->imm);
>>       tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2);
>> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
>>           tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned);
>>       }
>>   -    gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn);
>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>> +    gen_set_gpr(ctx, a->rd, succ_pc);
>> +
>>       tcg_gen_mov_tl(cpu_pc, target_pc);
>>       lookup_and_goto_ptr(ctx);
>>   @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>> *a, TCGCond cond)
>>       TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
>>       TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
>>       target_ulong next_pc;
>> +    target_ulong orig_pc_save = ctx->pc_save;
>>         if (get_xl(ctx) == MXL_RV128) {
>>           TCGv src1h = get_gprh(ctx, a->rs1);
>> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>> *a, TCGCond cond)
>>         gen_set_label(l); /* branch taken */
>>   +    ctx->pc_save = orig_pc_save;
>>       next_pc = ctx->base.pc_next + a->imm;
>>       if (!has_ext(ctx, RVC) && (next_pc & 0x3)) {
>>           /* misaligned */
>> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>> *a, TCGCond cond)
>>           gen_pc_plus_diff(target_pc, ctx, next_pc);
>>           gen_exception_inst_addr_mis(ctx, target_pc);
>>       } else {
>> -        gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm);
>> +        gen_goto_tb(ctx, 0, next_pc);
>>       }
>> +    ctx->pc_save = -1;
>>       ctx->base.is_jmp = DISAS_NORETURN;
>>         return true;
>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>> index d434fedb37..4623749602 100644
>> --- a/target/riscv/translate.c
>> +++ b/target/riscv/translate.c
>> @@ -59,6 +59,7 @@ typedef struct DisasContext {
>>       DisasContextBase base;
>>       /* pc_succ_insn points to the instruction following 
>> base.pc_next */
>>       target_ulong pc_succ_insn;
>> +    target_ulong pc_save;
>>       target_ulong priv_ver;
>>       RISCVMXL misa_mxl_max;
>>       RISCVMXL xl;
>> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx)
>>   static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
>>                                 target_ulong dest)
>>   {
>> -    if (get_xl(ctx) == MXL_RV32) {
>> -        dest = (int32_t)dest;
>> +    assert(ctx->pc_save != -1);
>> +    if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>> +        tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save);
>> +        if (get_xl(ctx) == MXL_RV32) {
>> +            tcg_gen_ext32s_tl(target, target);
>> +        }
>> +    } else {
>> +        if (get_xl(ctx) == MXL_RV32) {
>> +            dest = (int32_t)dest;
>> +        }
>> +        tcg_gen_movi_tl(target, dest);
>>       }
>> -    tcg_gen_movi_tl(target, dest);
>>   }
>>     static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest)
>>   {
>>       gen_pc_plus_diff(cpu_pc, ctx, dest);
>> +    ctx->pc_save = dest;
>>   }
>>     static void generate_exception(DisasContext *ctx, int excp)
>> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int 
>> n, target_ulong dest)
>>         * direct block chain benefits will be small.
>>         */
>>       if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) {
>> -        tcg_gen_goto_tb(n);
>> -        gen_set_pc_imm(ctx, dest);
>> +        /*
>> +         * For pcrel, the pc must always be up-to-date on entry to
>> +         * the linked TB, so that it can use simple additions for all
>> +         * further adjustments.  For !pcrel, the linked TB is compiled
>> +         * to know its full virtual address, so we can delay the
>> +         * update to pc to the unlinked path.  A long chain of links
>> +         * can thus avoid many updates to the PC.
>> +         */
>> +        if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>> +            gen_set_pc_imm(ctx, dest);
>> +            tcg_gen_goto_tb(n);
>> +        } else {
>> +            tcg_gen_goto_tb(n);
>> +            gen_set_pc_imm(ctx, dest);
>> +        }
>>           tcg_gen_exit_tb(ctx->base.tb, n);
>>       } else {
>>           gen_set_pc_imm(ctx, dest);
>> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, int 
>> reg_num, TCGv_i64 t)
>>   static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
>>   {
>>       target_ulong next_pc;
>> +    TCGv succ_pc = dest_gpr(ctx, rd);
>>         /* check misaligned: */
>>       next_pc = ctx->base.pc_next + imm;
>> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, 
>> target_ulong imm)
>>           }
>>       }
>>   -    gen_set_gpri(ctx, rd, ctx->pc_succ_insn);
>> -    gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this 
>> for safety */
>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>> +    gen_set_gpr(ctx, rd, succ_pc);
>> +
>> +    gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */
>>       ctx->base.is_jmp = DISAS_NORETURN;
>>   }
>>   @@ -1150,6 +1176,7 @@ static void 
>> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>       uint32_t tb_flags = ctx->base.tb->flags;
>>   +    ctx->pc_save = ctx->base.pc_first;
>>       ctx->pc_succ_insn = ctx->base.pc_first;
>>       ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
>>       ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
>> @@ -1195,8 +1222,13 @@ static void riscv_tr_tb_start(DisasContextBase 
>> *db, CPUState *cpu)
>>   static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState 
>> *cpu)
>>   {
>>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
>> +    target_ulong pc_next = ctx->base.pc_next;
>> +
>> +    if (tb_cflags(dcbase->tb) & CF_PCREL) {
>> +        pc_next &= ~TARGET_PAGE_MASK;
>> +    }
>>   -    tcg_gen_insn_start(ctx->base.pc_next, 0);
>> +    tcg_gen_insn_start(pc_next, 0);
>>       ctx->insn_start = tcg_last_op();
>>   }
LIU Zhiwei April 4, 2023, 7:07 a.m. UTC | #4
On 2023/4/4 11:46, liweiwei wrote:
>
> On 2023/4/4 11:12, LIU Zhiwei wrote:
>>
>> On 2023/4/4 10:06, Weiwei Li wrote:
>>> Add a base pc_save for PC-relative translation(CF_PCREL).
>>> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb.
>>> We can get pc-relative address from following formula:
>>>    real_pc = (old)env->pc + diff, where diff = target_pc - 
>>> ctx->pc_save.
>>> Use gen_get_target_pc to compute target address of auipc and successor
>>> address of jalr and jal.
>>>
>>> The existence of CF_PCREL can improve performance with the guest
>>> kernel's address space randomization.  Each guest process maps libc.so
>>> (et al) at a different virtual address, and this allows those
>>> translations to be shared.
>>>
>>> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
>>> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>> ---
>>>   target/riscv/cpu.c                      | 29 +++++++++------
>>>   target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++--
>>
>> Miss the process for trans_ebreak.
>>
>> I want to construct the PCREL feature on the processing of ctx pc 
>> related fields, which is the reason why we need do specially process. 
>> For example,
>>
>>  static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>>  {
>> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>> +    if (tb_cflags(ctx->cflags) & CF_PCREL) {
>> +        target_ulong pc_rel = ctx->base.pc_next - ctx->base.pc_first 
>> + a->imm;
>> +        gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel);
>> +    } else {
>> +        gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>> +    }
>>      return true;
>>  }
>>
>> +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv 
>> t, target_ulong rel)
>> +{
>> +    TCGv dest = dest_gpr(ctx, reg_num);
>> +    tcg_gen_addi_tl(dest, t, rel);
>> +    gen_set_gpr(ctx, reg_num, dest);
>> +}
>> +
>>
>> But if it is too difficult to reuse the current implementation, your 
>> implementation is also acceptable to me.
>
> Sorry, I don't get your idea. gen_pc_plus_diff() can do all the above 
> job.

Yes, I think so. I just suspect whether it is easy to read and verify 
the correctness. And the maintenance for the future.


1) Maybe we should split the PCREL to a split patch set, as it is a new 
feature. The point masking can still use this thread.


2) For the new patch set for PCREL, process where we need to modify one 
by one. One clue for recognize where to modify is the ctx pc related 
fields, such as pc_next/pc_first/succ_insn_pc.

One thing may worth to try is that don't change the code in 
insn_trans/trans_X.  Just rename the origin API we need to modify to a 
new name with _abs suffix. And and a correspond set of API for PCREL 
with _pcrel suffix.

For example, in DisasContext, we define

void (*gen_set_gpri)(DisasContext *ctx, int reg_num, target_long imm);

In disas_init_fn,

if (tb_cflags(tb) & CF_PCREL) {
	gen_set_gpri = gen_set_gpri_pcrel;
} else {
	gen_set_gpri = gen_set_gpri_abs;
}

Thus we can write the code in trans_insn without think about the PCREL.

Thanks,
Zhiwei

>
> Regards,
>
> Weiwei Li
>
>>
>> Zhiwei
>>
>>>   target/riscv/translate.c | 48 ++++++++++++++++++++-----
>>>   3 files changed, 70 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
>>> index 1e97473af2..646fa31a59 100644
>>> --- a/target/riscv/cpu.c
>>> +++ b/target/riscv/cpu.c
>>> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
>>>   static void riscv_cpu_synchronize_from_tb(CPUState *cs,
>>>                                             const TranslationBlock *tb)
>>>   {
>>> -    RISCVCPU *cpu = RISCV_CPU(cs);
>>> -    CPURISCVState *env = &cpu->env;
>>> -    RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>> +    if (!(tb_cflags(tb) & CF_PCREL)) {
>>> +        RISCVCPU *cpu = RISCV_CPU(cs);
>>> +        CPURISCVState *env = &cpu->env;
>>> +        RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>>   -    tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>>> +        tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>>>   -    if (xl == MXL_RV32) {
>>> -        env->pc = (int32_t) tb->pc;
>>> -    } else {
>>> -        env->pc = tb->pc;
>>> +        if (xl == MXL_RV32) {
>>> +            env->pc = (int32_t) tb->pc;
>>> +        } else {
>>> +            env->pc = tb->pc;
>>> +        }
>>>       }
>>>   }
>>>   @@ -693,11 +695,18 @@ static void 
>>> riscv_restore_state_to_opc(CPUState *cs,
>>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>>       CPURISCVState *env = &cpu->env;
>>>       RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>> +    target_ulong pc;
>>> +
>>> +    if (tb_cflags(tb) & CF_PCREL) {
>>> +        pc = (env->pc & TARGET_PAGE_MASK) | data[0];
>>> +    } else {
>>> +        pc = data[0];
>>> +    }
>>>         if (xl == MXL_RV32) {
>>> -        env->pc = (int32_t)data[0];
>>> +        env->pc = (int32_t)pc;
>>>       } else {
>>> -        env->pc = data[0];
>>> +        env->pc = pc;
>>>       }
>>>       env->bins = data[1];
>>>   }
>>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
>>> b/target/riscv/insn_trans/trans_rvi.c.inc
>>> index cc72864d32..7cbbdac5aa 100644
>>> --- a/target/riscv/insn_trans/trans_rvi.c.inc
>>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
>>> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a)
>>>     static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>>>   {
>>> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>>> +    TCGv target_pc = dest_gpr(ctx, a->rd);
>>> +    gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next);
>>> +    gen_set_gpr(ctx, a->rd, target_pc);
>>>       return true;
>>>   }
>>>   @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, 
>>> arg_jalr *a)
>>>   {
>>>       TCGLabel *misaligned = NULL;
>>>       TCGv target_pc = tcg_temp_new();
>>> +    TCGv succ_pc = dest_gpr(ctx, a->rd);
>>>         tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), 
>>> a->imm);
>>>       tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2);
>>> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr 
>>> *a)
>>>           tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned);
>>>       }
>>>   -    gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn);
>>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>>> +    gen_set_gpr(ctx, a->rd, succ_pc);
>>> +
>>>       tcg_gen_mov_tl(cpu_pc, target_pc);
>>>       lookup_and_goto_ptr(ctx);
>>>   @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, 
>>> arg_b *a, TCGCond cond)
>>>       TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
>>>       TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
>>>       target_ulong next_pc;
>>> +    target_ulong orig_pc_save = ctx->pc_save;
>>>         if (get_xl(ctx) == MXL_RV128) {
>>>           TCGv src1h = get_gprh(ctx, a->rs1);
>>> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>>> *a, TCGCond cond)
>>>         gen_set_label(l); /* branch taken */
>>>   +    ctx->pc_save = orig_pc_save;
>>>       next_pc = ctx->base.pc_next + a->imm;
>>>       if (!has_ext(ctx, RVC) && (next_pc & 0x3)) {
>>>           /* misaligned */
>>> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>>> *a, TCGCond cond)
>>>           gen_pc_plus_diff(target_pc, ctx, next_pc);
>>>           gen_exception_inst_addr_mis(ctx, target_pc);
>>>       } else {
>>> -        gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm);
>>> +        gen_goto_tb(ctx, 0, next_pc);
>>>       }
>>> +    ctx->pc_save = -1;
>>>       ctx->base.is_jmp = DISAS_NORETURN;
>>>         return true;
>>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>>> index d434fedb37..4623749602 100644
>>> --- a/target/riscv/translate.c
>>> +++ b/target/riscv/translate.c
>>> @@ -59,6 +59,7 @@ typedef struct DisasContext {
>>>       DisasContextBase base;
>>>       /* pc_succ_insn points to the instruction following 
>>> base.pc_next */
>>>       target_ulong pc_succ_insn;
>>> +    target_ulong pc_save;
>>>       target_ulong priv_ver;
>>>       RISCVMXL misa_mxl_max;
>>>       RISCVMXL xl;
>>> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx)
>>>   static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
>>>                                 target_ulong dest)
>>>   {
>>> -    if (get_xl(ctx) == MXL_RV32) {
>>> -        dest = (int32_t)dest;
>>> +    assert(ctx->pc_save != -1);
>>> +    if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>>> +        tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save);
>>> +        if (get_xl(ctx) == MXL_RV32) {
>>> +            tcg_gen_ext32s_tl(target, target);
>>> +        }
>>> +    } else {
>>> +        if (get_xl(ctx) == MXL_RV32) {
>>> +            dest = (int32_t)dest;
>>> +        }
>>> +        tcg_gen_movi_tl(target, dest);
>>>       }
>>> -    tcg_gen_movi_tl(target, dest);
>>>   }
>>>     static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest)
>>>   {
>>>       gen_pc_plus_diff(cpu_pc, ctx, dest);
>>> +    ctx->pc_save = dest;
>>>   }
>>>     static void generate_exception(DisasContext *ctx, int excp)
>>> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int 
>>> n, target_ulong dest)
>>>         * direct block chain benefits will be small.
>>>         */
>>>       if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) {
>>> -        tcg_gen_goto_tb(n);
>>> -        gen_set_pc_imm(ctx, dest);
>>> +        /*
>>> +         * For pcrel, the pc must always be up-to-date on entry to
>>> +         * the linked TB, so that it can use simple additions for all
>>> +         * further adjustments.  For !pcrel, the linked TB is compiled
>>> +         * to know its full virtual address, so we can delay the
>>> +         * update to pc to the unlinked path.  A long chain of links
>>> +         * can thus avoid many updates to the PC.
>>> +         */
>>> +        if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>>> +            gen_set_pc_imm(ctx, dest);
>>> +            tcg_gen_goto_tb(n);
>>> +        } else {
>>> +            tcg_gen_goto_tb(n);
>>> +            gen_set_pc_imm(ctx, dest);
>>> +        }
>>>           tcg_gen_exit_tb(ctx->base.tb, n);
>>>       } else {
>>>           gen_set_pc_imm(ctx, dest);
>>> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, int 
>>> reg_num, TCGv_i64 t)
>>>   static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
>>>   {
>>>       target_ulong next_pc;
>>> +    TCGv succ_pc = dest_gpr(ctx, rd);
>>>         /* check misaligned: */
>>>       next_pc = ctx->base.pc_next + imm;
>>> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, 
>>> target_ulong imm)
>>>           }
>>>       }
>>>   -    gen_set_gpri(ctx, rd, ctx->pc_succ_insn);
>>> -    gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this 
>>> for safety */
>>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>>> +    gen_set_gpr(ctx, rd, succ_pc);
>>> +
>>> +    gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */
>>>       ctx->base.is_jmp = DISAS_NORETURN;
>>>   }
>>>   @@ -1150,6 +1176,7 @@ static void 
>>> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>>       uint32_t tb_flags = ctx->base.tb->flags;
>>>   +    ctx->pc_save = ctx->base.pc_first;
>>>       ctx->pc_succ_insn = ctx->base.pc_first;
>>>       ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
>>>       ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
>>> @@ -1195,8 +1222,13 @@ static void 
>>> riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
>>>   static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState 
>>> *cpu)
>>>   {
>>>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
>>> +    target_ulong pc_next = ctx->base.pc_next;
>>> +
>>> +    if (tb_cflags(dcbase->tb) & CF_PCREL) {
>>> +        pc_next &= ~TARGET_PAGE_MASK;
>>> +    }
>>>   -    tcg_gen_insn_start(ctx->base.pc_next, 0);
>>> +    tcg_gen_insn_start(pc_next, 0);
>>>       ctx->insn_start = tcg_last_op();
>>>   }
Weiwei Li April 4, 2023, 8:48 a.m. UTC | #5
On 2023/4/4 15:07, LIU Zhiwei wrote:
>
> On 2023/4/4 11:46, liweiwei wrote:
>>
>> On 2023/4/4 11:12, LIU Zhiwei wrote:
>>>
>>> On 2023/4/4 10:06, Weiwei Li wrote:
>>>> Add a base pc_save for PC-relative translation(CF_PCREL).
>>>> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb.
>>>> We can get pc-relative address from following formula:
>>>>    real_pc = (old)env->pc + diff, where diff = target_pc - 
>>>> ctx->pc_save.
>>>> Use gen_get_target_pc to compute target address of auipc and successor
>>>> address of jalr and jal.
>>>>
>>>> The existence of CF_PCREL can improve performance with the guest
>>>> kernel's address space randomization.  Each guest process maps libc.so
>>>> (et al) at a different virtual address, and this allows those
>>>> translations to be shared.
>>>>
>>>> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
>>>> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
>>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>>> ---
>>>>   target/riscv/cpu.c                      | 29 +++++++++------
>>>>   target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++--
>>>
>>> Miss the process for trans_ebreak.
>>>
>>> I want to construct the PCREL feature on the processing of ctx pc 
>>> related fields, which is the reason why we need do specially 
>>> process. For example,
>>>
>>>  static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>>>  {
>>> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>>> +    if (tb_cflags(ctx->cflags) & CF_PCREL) {
>>> +        target_ulong pc_rel = ctx->base.pc_next - 
>>> ctx->base.pc_first + a->imm;
>>> +        gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel);
>>> +    } else {
>>> +        gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>>> +    }
>>>      return true;
>>>  }
>>>
>>> +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv 
>>> t, target_ulong rel)
>>> +{
>>> +    TCGv dest = dest_gpr(ctx, reg_num);
>>> +    tcg_gen_addi_tl(dest, t, rel);
>>> +    gen_set_gpr(ctx, reg_num, dest);
>>> +}
>>> +
>>>
>>> But if it is too difficult to reuse the current implementation, your 
>>> implementation is also acceptable to me.
>>
>> Sorry, I don't get your idea. gen_pc_plus_diff() can do all the above 
>> job.
>
> Yes, I think so. I just suspect whether it is easy to read and verify 
> the correctness. And the maintenance for the future.
>
>
> 1) Maybe we should split the PCREL to a split patch set, as it is a 
> new feature. The point masking can still use this thread.

Point mask for instruction relies on PCREL. That's why I introduce PCREL 
in this patchset.

Maybe we can divide this patch if needed.

>
>
> 2) For the new patch set for PCREL, process where we need to modify 
> one by one. One clue for recognize where to modify is the ctx pc 
> related fields, such as pc_next/pc_first/succ_insn_pc.
>
> One thing may worth to try is that don't change the code in 
> insn_trans/trans_X.  Just rename the origin API we need to modify to a 
> new name with _abs suffix. And and a correspond set of API for PCREL 
> with _pcrel suffix.
>
  I don't find a good way to  remain trans_* unchanged to support PCREL.
> For example, in DisasContext, we define
>
> void (*gen_set_gpri)(DisasContext *ctx, int reg_num, target_long imm);
>
> In disas_init_fn,
>
> if (tb_cflags(tb) & CF_PCREL) {
>     gen_set_gpri = gen_set_gpri_pcrel;
> } else {
>     gen_set_gpri = gen_set_gpri_abs;
> } 
> Thus we can write the code in trans_insn without think about the PCREL.

That's what I want. And PCREL also have  been avoided in following code 
of trans_*.

However, we should't update  pc_next/pc_succ_insn related address into 
register directly by reusing existed API like gen_set_gpri.

It's a general API to set gpr to any imm. However PC-relative only works 
for pc-related imm.

Maybe we can introduce a new API like gen_set_gpr_pci() to set pc 
related imm.

However it seems unnecessary, because it's no difference from current 
logic by using gen_pc_plus_diff() from the developer hand.

Regards,

Weiwei Li

>
> Thanks,
> Zhiwei
>
>>
>> Regards,
>>
>> Weiwei Li
>>
>>>
>>> Zhiwei
>>>
>>>>   target/riscv/translate.c | 48 ++++++++++++++++++++-----
>>>>   3 files changed, 70 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
>>>> index 1e97473af2..646fa31a59 100644
>>>> --- a/target/riscv/cpu.c
>>>> +++ b/target/riscv/cpu.c
>>>> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
>>>>   static void riscv_cpu_synchronize_from_tb(CPUState *cs,
>>>>                                             const TranslationBlock 
>>>> *tb)
>>>>   {
>>>> -    RISCVCPU *cpu = RISCV_CPU(cs);
>>>> -    CPURISCVState *env = &cpu->env;
>>>> -    RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>>> +    if (!(tb_cflags(tb) & CF_PCREL)) {
>>>> +        RISCVCPU *cpu = RISCV_CPU(cs);
>>>> +        CPURISCVState *env = &cpu->env;
>>>> +        RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>>>   -    tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>>>> +        tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>>>>   -    if (xl == MXL_RV32) {
>>>> -        env->pc = (int32_t) tb->pc;
>>>> -    } else {
>>>> -        env->pc = tb->pc;
>>>> +        if (xl == MXL_RV32) {
>>>> +            env->pc = (int32_t) tb->pc;
>>>> +        } else {
>>>> +            env->pc = tb->pc;
>>>> +        }
>>>>       }
>>>>   }
>>>>   @@ -693,11 +695,18 @@ static void 
>>>> riscv_restore_state_to_opc(CPUState *cs,
>>>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>>>       CPURISCVState *env = &cpu->env;
>>>>       RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>>> +    target_ulong pc;
>>>> +
>>>> +    if (tb_cflags(tb) & CF_PCREL) {
>>>> +        pc = (env->pc & TARGET_PAGE_MASK) | data[0];
>>>> +    } else {
>>>> +        pc = data[0];
>>>> +    }
>>>>         if (xl == MXL_RV32) {
>>>> -        env->pc = (int32_t)data[0];
>>>> +        env->pc = (int32_t)pc;
>>>>       } else {
>>>> -        env->pc = data[0];
>>>> +        env->pc = pc;
>>>>       }
>>>>       env->bins = data[1];
>>>>   }
>>>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
>>>> b/target/riscv/insn_trans/trans_rvi.c.inc
>>>> index cc72864d32..7cbbdac5aa 100644
>>>> --- a/target/riscv/insn_trans/trans_rvi.c.inc
>>>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
>>>> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a)
>>>>     static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>>>>   {
>>>> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>>>> +    TCGv target_pc = dest_gpr(ctx, a->rd);
>>>> +    gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next);
>>>> +    gen_set_gpr(ctx, a->rd, target_pc);
>>>>       return true;
>>>>   }
>>>>   @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, 
>>>> arg_jalr *a)
>>>>   {
>>>>       TCGLabel *misaligned = NULL;
>>>>       TCGv target_pc = tcg_temp_new();
>>>> +    TCGv succ_pc = dest_gpr(ctx, a->rd);
>>>>         tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), 
>>>> a->imm);
>>>>       tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2);
>>>> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, 
>>>> arg_jalr *a)
>>>>           tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned);
>>>>       }
>>>>   -    gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn);
>>>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>>>> +    gen_set_gpr(ctx, a->rd, succ_pc);
>>>> +
>>>>       tcg_gen_mov_tl(cpu_pc, target_pc);
>>>>       lookup_and_goto_ptr(ctx);
>>>>   @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, 
>>>> arg_b *a, TCGCond cond)
>>>>       TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
>>>>       TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
>>>>       target_ulong next_pc;
>>>> +    target_ulong orig_pc_save = ctx->pc_save;
>>>>         if (get_xl(ctx) == MXL_RV128) {
>>>>           TCGv src1h = get_gprh(ctx, a->rs1);
>>>> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>>>> *a, TCGCond cond)
>>>>         gen_set_label(l); /* branch taken */
>>>>   +    ctx->pc_save = orig_pc_save;
>>>>       next_pc = ctx->base.pc_next + a->imm;
>>>>       if (!has_ext(ctx, RVC) && (next_pc & 0x3)) {
>>>>           /* misaligned */
>>>> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, arg_b 
>>>> *a, TCGCond cond)
>>>>           gen_pc_plus_diff(target_pc, ctx, next_pc);
>>>>           gen_exception_inst_addr_mis(ctx, target_pc);
>>>>       } else {
>>>> -        gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm);
>>>> +        gen_goto_tb(ctx, 0, next_pc);
>>>>       }
>>>> +    ctx->pc_save = -1;
>>>>       ctx->base.is_jmp = DISAS_NORETURN;
>>>>         return true;
>>>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>>>> index d434fedb37..4623749602 100644
>>>> --- a/target/riscv/translate.c
>>>> +++ b/target/riscv/translate.c
>>>> @@ -59,6 +59,7 @@ typedef struct DisasContext {
>>>>       DisasContextBase base;
>>>>       /* pc_succ_insn points to the instruction following 
>>>> base.pc_next */
>>>>       target_ulong pc_succ_insn;
>>>> +    target_ulong pc_save;
>>>>       target_ulong priv_ver;
>>>>       RISCVMXL misa_mxl_max;
>>>>       RISCVMXL xl;
>>>> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx)
>>>>   static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
>>>>                                 target_ulong dest)
>>>>   {
>>>> -    if (get_xl(ctx) == MXL_RV32) {
>>>> -        dest = (int32_t)dest;
>>>> +    assert(ctx->pc_save != -1);
>>>> +    if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>>>> +        tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save);
>>>> +        if (get_xl(ctx) == MXL_RV32) {
>>>> +            tcg_gen_ext32s_tl(target, target);
>>>> +        }
>>>> +    } else {
>>>> +        if (get_xl(ctx) == MXL_RV32) {
>>>> +            dest = (int32_t)dest;
>>>> +        }
>>>> +        tcg_gen_movi_tl(target, dest);
>>>>       }
>>>> -    tcg_gen_movi_tl(target, dest);
>>>>   }
>>>>     static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest)
>>>>   {
>>>>       gen_pc_plus_diff(cpu_pc, ctx, dest);
>>>> +    ctx->pc_save = dest;
>>>>   }
>>>>     static void generate_exception(DisasContext *ctx, int excp)
>>>> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, int 
>>>> n, target_ulong dest)
>>>>         * direct block chain benefits will be small.
>>>>         */
>>>>       if (translator_use_goto_tb(&ctx->base, dest) && 
>>>> !ctx->itrigger) {
>>>> -        tcg_gen_goto_tb(n);
>>>> -        gen_set_pc_imm(ctx, dest);
>>>> +        /*
>>>> +         * For pcrel, the pc must always be up-to-date on entry to
>>>> +         * the linked TB, so that it can use simple additions for all
>>>> +         * further adjustments.  For !pcrel, the linked TB is 
>>>> compiled
>>>> +         * to know its full virtual address, so we can delay the
>>>> +         * update to pc to the unlinked path.  A long chain of links
>>>> +         * can thus avoid many updates to the PC.
>>>> +         */
>>>> +        if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>>>> +            gen_set_pc_imm(ctx, dest);
>>>> +            tcg_gen_goto_tb(n);
>>>> +        } else {
>>>> +            tcg_gen_goto_tb(n);
>>>> +            gen_set_pc_imm(ctx, dest);
>>>> +        }
>>>>           tcg_gen_exit_tb(ctx->base.tb, n);
>>>>       } else {
>>>>           gen_set_pc_imm(ctx, dest);
>>>> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, 
>>>> int reg_num, TCGv_i64 t)
>>>>   static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
>>>>   {
>>>>       target_ulong next_pc;
>>>> +    TCGv succ_pc = dest_gpr(ctx, rd);
>>>>         /* check misaligned: */
>>>>       next_pc = ctx->base.pc_next + imm;
>>>> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int rd, 
>>>> target_ulong imm)
>>>>           }
>>>>       }
>>>>   -    gen_set_gpri(ctx, rd, ctx->pc_succ_insn);
>>>> -    gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this 
>>>> for safety */
>>>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>>>> +    gen_set_gpr(ctx, rd, succ_pc);
>>>> +
>>>> +    gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */
>>>>       ctx->base.is_jmp = DISAS_NORETURN;
>>>>   }
>>>>   @@ -1150,6 +1176,7 @@ static void 
>>>> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>>>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>>>       uint32_t tb_flags = ctx->base.tb->flags;
>>>>   +    ctx->pc_save = ctx->base.pc_first;
>>>>       ctx->pc_succ_insn = ctx->base.pc_first;
>>>>       ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
>>>>       ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
>>>> @@ -1195,8 +1222,13 @@ static void 
>>>> riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
>>>>   static void riscv_tr_insn_start(DisasContextBase *dcbase, 
>>>> CPUState *cpu)
>>>>   {
>>>>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
>>>> +    target_ulong pc_next = ctx->base.pc_next;
>>>> +
>>>> +    if (tb_cflags(dcbase->tb) & CF_PCREL) {
>>>> +        pc_next &= ~TARGET_PAGE_MASK;
>>>> +    }
>>>>   -    tcg_gen_insn_start(ctx->base.pc_next, 0);
>>>> +    tcg_gen_insn_start(pc_next, 0);
>>>>       ctx->insn_start = tcg_last_op();
>>>>   }
LIU Zhiwei April 4, 2023, 9:23 a.m. UTC | #6
On 2023/4/4 16:48, liweiwei wrote:
>
> On 2023/4/4 15:07, LIU Zhiwei wrote:
>>
>> On 2023/4/4 11:46, liweiwei wrote:
>>>
>>> On 2023/4/4 11:12, LIU Zhiwei wrote:
>>>>
>>>> On 2023/4/4 10:06, Weiwei Li wrote:
>>>>> Add a base pc_save for PC-relative translation(CF_PCREL).
>>>>> Diable the directly sync pc from tb by riscv_cpu_synchronize_from_tb.
>>>>> We can get pc-relative address from following formula:
>>>>>    real_pc = (old)env->pc + diff, where diff = target_pc - 
>>>>> ctx->pc_save.
>>>>> Use gen_get_target_pc to compute target address of auipc and 
>>>>> successor
>>>>> address of jalr and jal.
>>>>>
>>>>> The existence of CF_PCREL can improve performance with the guest
>>>>> kernel's address space randomization.  Each guest process maps 
>>>>> libc.so
>>>>> (et al) at a different virtual address, and this allows those
>>>>> translations to be shared.
>>>>>
>>>>> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
>>>>> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
>>>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>>>> ---
>>>>>   target/riscv/cpu.c                      | 29 +++++++++------
>>>>>   target/riscv/insn_trans/trans_rvi.c.inc | 14 ++++++--
>>>>
>>>> Miss the process for trans_ebreak.
>>>>
>>>> I want to construct the PCREL feature on the processing of ctx pc 
>>>> related fields, which is the reason why we need do specially 
>>>> process. For example,
>>>>
>>>>  static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>>>>  {
>>>> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>>>> +    if (tb_cflags(ctx->cflags) & CF_PCREL) {
>>>> +        target_ulong pc_rel = ctx->base.pc_next - 
>>>> ctx->base.pc_first + a->imm;
>>>> +        gen_set_gpr_pcrel(ctx, a->rd, cpu_pc, pc_rel);
>>>> +    } else {
>>>> +        gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>>>> +    }
>>>>      return true;
>>>>  }
>>>>
>>>> +static void gen_set_gpr_pcrel(DisasContext *ctx, int reg_num, TCGv 
>>>> t, target_ulong rel)
>>>> +{
>>>> +    TCGv dest = dest_gpr(ctx, reg_num);
>>>> +    tcg_gen_addi_tl(dest, t, rel);
>>>> +    gen_set_gpr(ctx, reg_num, dest);
>>>> +}
>>>> +
>>>>
>>>> But if it is too difficult to reuse the current implementation, 
>>>> your implementation is also acceptable to me.
>>>
>>> Sorry, I don't get your idea. gen_pc_plus_diff() can do all the 
>>> above job.
>>
>> Yes, I think so. I just suspect whether it is easy to read and verify 
>> the correctness. And the maintenance for the future.
>>
>>
>> 1) Maybe we should split the PCREL to a split patch set, as it is a 
>> new feature. The point masking can still use this thread.
>
> Point mask for instruction relies on PCREL. That's why I introduce 
> PCREL in this patchset.
>
> Maybe we can divide this patch if needed.
If we won't use another way to rewrite the PCREL, we don't have to split it.
>
>>
>>
>> 2) For the new patch set for PCREL, process where we need to modify 
>> one by one. One clue for recognize where to modify is the ctx pc 
>> related fields, such as pc_next/pc_first/succ_insn_pc.
>>
>> One thing may worth to try is that don't change the code in 
>> insn_trans/trans_X.  Just rename the origin API we need to modify to 
>> a new name with _abs suffix. And and a correspond set of API for 
>> PCREL with _pcrel suffix.
>>
>  I don't find a good way to  remain trans_* unchanged to support PCREL.
>> For example, in DisasContext, we define
>>
>> void (*gen_set_gpri)(DisasContext *ctx, int reg_num, target_long imm);
>>
>> In disas_init_fn,
>>
>> if (tb_cflags(tb) & CF_PCREL) {
>>     gen_set_gpri = gen_set_gpri_pcrel;
>> } else {
>>     gen_set_gpri = gen_set_gpri_abs;
>> } Thus we can write the code in trans_insn without think about the 
>> PCREL.
>
> That's what I want. And PCREL also have  been avoided in following 
> code of trans_*.
>
> However, we should't update  pc_next/pc_succ_insn related address into 
> register directly by reusing existed API like gen_set_gpri.
>
> It's a general API to set gpr to any imm. However PC-relative only 
> works for pc-related imm.
>
> Maybe we can introduce a new API like gen_set_gpr_pci() to set pc 
> related imm.

Yes, I think so, except _pci is not a good suffix.  But I don't insist 
on this way.

Zhiwei

>
> However it seems unnecessary, because it's no difference from current 
> logic by using gen_pc_plus_diff() from the developer hand.
>
> Regards,
>
> Weiwei Li
>
>>
>> Thanks,
>> Zhiwei
>>
>>>
>>> Regards,
>>>
>>> Weiwei Li
>>>
>>>>
>>>> Zhiwei
>>>>
>>>>>   target/riscv/translate.c | 48 ++++++++++++++++++++-----
>>>>>   3 files changed, 70 insertions(+), 21 deletions(-)
>>>>>
>>>>> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
>>>>> index 1e97473af2..646fa31a59 100644
>>>>> --- a/target/riscv/cpu.c
>>>>> +++ b/target/riscv/cpu.c
>>>>> @@ -658,16 +658,18 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
>>>>>   static void riscv_cpu_synchronize_from_tb(CPUState *cs,
>>>>>                                             const TranslationBlock 
>>>>> *tb)
>>>>>   {
>>>>> -    RISCVCPU *cpu = RISCV_CPU(cs);
>>>>> -    CPURISCVState *env = &cpu->env;
>>>>> -    RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>>>> +    if (!(tb_cflags(tb) & CF_PCREL)) {
>>>>> +        RISCVCPU *cpu = RISCV_CPU(cs);
>>>>> +        CPURISCVState *env = &cpu->env;
>>>>> +        RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>>>>   -    tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>>>>> +        tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
>>>>>   -    if (xl == MXL_RV32) {
>>>>> -        env->pc = (int32_t) tb->pc;
>>>>> -    } else {
>>>>> -        env->pc = tb->pc;
>>>>> +        if (xl == MXL_RV32) {
>>>>> +            env->pc = (int32_t) tb->pc;
>>>>> +        } else {
>>>>> +            env->pc = tb->pc;
>>>>> +        }
>>>>>       }
>>>>>   }
>>>>>   @@ -693,11 +695,18 @@ static void 
>>>>> riscv_restore_state_to_opc(CPUState *cs,
>>>>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>>>>       CPURISCVState *env = &cpu->env;
>>>>>       RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
>>>>> +    target_ulong pc;
>>>>> +
>>>>> +    if (tb_cflags(tb) & CF_PCREL) {
>>>>> +        pc = (env->pc & TARGET_PAGE_MASK) | data[0];
>>>>> +    } else {
>>>>> +        pc = data[0];
>>>>> +    }
>>>>>         if (xl == MXL_RV32) {
>>>>> -        env->pc = (int32_t)data[0];
>>>>> +        env->pc = (int32_t)pc;
>>>>>       } else {
>>>>> -        env->pc = data[0];
>>>>> +        env->pc = pc;
>>>>>       }
>>>>>       env->bins = data[1];
>>>>>   }
>>>>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
>>>>> b/target/riscv/insn_trans/trans_rvi.c.inc
>>>>> index cc72864d32..7cbbdac5aa 100644
>>>>> --- a/target/riscv/insn_trans/trans_rvi.c.inc
>>>>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
>>>>> @@ -38,7 +38,9 @@ static bool trans_lui(DisasContext *ctx, arg_lui 
>>>>> *a)
>>>>>     static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>>>>>   {
>>>>> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>>>>> +    TCGv target_pc = dest_gpr(ctx, a->rd);
>>>>> +    gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next);
>>>>> +    gen_set_gpr(ctx, a->rd, target_pc);
>>>>>       return true;
>>>>>   }
>>>>>   @@ -52,6 +54,7 @@ static bool trans_jalr(DisasContext *ctx, 
>>>>> arg_jalr *a)
>>>>>   {
>>>>>       TCGLabel *misaligned = NULL;
>>>>>       TCGv target_pc = tcg_temp_new();
>>>>> +    TCGv succ_pc = dest_gpr(ctx, a->rd);
>>>>>         tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), 
>>>>> a->imm);
>>>>>       tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2);
>>>>> @@ -68,7 +71,9 @@ static bool trans_jalr(DisasContext *ctx, 
>>>>> arg_jalr *a)
>>>>>           tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned);
>>>>>       }
>>>>>   -    gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn);
>>>>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>>>>> +    gen_set_gpr(ctx, a->rd, succ_pc);
>>>>> +
>>>>>       tcg_gen_mov_tl(cpu_pc, target_pc);
>>>>>       lookup_and_goto_ptr(ctx);
>>>>>   @@ -159,6 +164,7 @@ static bool gen_branch(DisasContext *ctx, 
>>>>> arg_b *a, TCGCond cond)
>>>>>       TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
>>>>>       TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
>>>>>       target_ulong next_pc;
>>>>> +    target_ulong orig_pc_save = ctx->pc_save;
>>>>>         if (get_xl(ctx) == MXL_RV128) {
>>>>>           TCGv src1h = get_gprh(ctx, a->rs1);
>>>>> @@ -175,6 +181,7 @@ static bool gen_branch(DisasContext *ctx, 
>>>>> arg_b *a, TCGCond cond)
>>>>>         gen_set_label(l); /* branch taken */
>>>>>   +    ctx->pc_save = orig_pc_save;
>>>>>       next_pc = ctx->base.pc_next + a->imm;
>>>>>       if (!has_ext(ctx, RVC) && (next_pc & 0x3)) {
>>>>>           /* misaligned */
>>>>> @@ -182,8 +189,9 @@ static bool gen_branch(DisasContext *ctx, 
>>>>> arg_b *a, TCGCond cond)
>>>>>           gen_pc_plus_diff(target_pc, ctx, next_pc);
>>>>>           gen_exception_inst_addr_mis(ctx, target_pc);
>>>>>       } else {
>>>>> -        gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm);
>>>>> +        gen_goto_tb(ctx, 0, next_pc);
>>>>>       }
>>>>> +    ctx->pc_save = -1;
>>>>>       ctx->base.is_jmp = DISAS_NORETURN;
>>>>>         return true;
>>>>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>>>>> index d434fedb37..4623749602 100644
>>>>> --- a/target/riscv/translate.c
>>>>> +++ b/target/riscv/translate.c
>>>>> @@ -59,6 +59,7 @@ typedef struct DisasContext {
>>>>>       DisasContextBase base;
>>>>>       /* pc_succ_insn points to the instruction following 
>>>>> base.pc_next */
>>>>>       target_ulong pc_succ_insn;
>>>>> +    target_ulong pc_save;
>>>>>       target_ulong priv_ver;
>>>>>       RISCVMXL misa_mxl_max;
>>>>>       RISCVMXL xl;
>>>>> @@ -225,15 +226,24 @@ static void decode_save_opc(DisasContext *ctx)
>>>>>   static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
>>>>>                                 target_ulong dest)
>>>>>   {
>>>>> -    if (get_xl(ctx) == MXL_RV32) {
>>>>> -        dest = (int32_t)dest;
>>>>> +    assert(ctx->pc_save != -1);
>>>>> +    if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>>>>> +        tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save);
>>>>> +        if (get_xl(ctx) == MXL_RV32) {
>>>>> +            tcg_gen_ext32s_tl(target, target);
>>>>> +        }
>>>>> +    } else {
>>>>> +        if (get_xl(ctx) == MXL_RV32) {
>>>>> +            dest = (int32_t)dest;
>>>>> +        }
>>>>> +        tcg_gen_movi_tl(target, dest);
>>>>>       }
>>>>> -    tcg_gen_movi_tl(target, dest);
>>>>>   }
>>>>>     static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest)
>>>>>   {
>>>>>       gen_pc_plus_diff(cpu_pc, ctx, dest);
>>>>> +    ctx->pc_save = dest;
>>>>>   }
>>>>>     static void generate_exception(DisasContext *ctx, int excp)
>>>>> @@ -287,8 +297,21 @@ static void gen_goto_tb(DisasContext *ctx, 
>>>>> int n, target_ulong dest)
>>>>>         * direct block chain benefits will be small.
>>>>>         */
>>>>>       if (translator_use_goto_tb(&ctx->base, dest) && 
>>>>> !ctx->itrigger) {
>>>>> -        tcg_gen_goto_tb(n);
>>>>> -        gen_set_pc_imm(ctx, dest);
>>>>> +        /*
>>>>> +         * For pcrel, the pc must always be up-to-date on entry to
>>>>> +         * the linked TB, so that it can use simple additions for 
>>>>> all
>>>>> +         * further adjustments.  For !pcrel, the linked TB is 
>>>>> compiled
>>>>> +         * to know its full virtual address, so we can delay the
>>>>> +         * update to pc to the unlinked path.  A long chain of links
>>>>> +         * can thus avoid many updates to the PC.
>>>>> +         */
>>>>> +        if (tb_cflags(ctx->base.tb) & CF_PCREL) {
>>>>> +            gen_set_pc_imm(ctx, dest);
>>>>> +            tcg_gen_goto_tb(n);
>>>>> +        } else {
>>>>> +            tcg_gen_goto_tb(n);
>>>>> +            gen_set_pc_imm(ctx, dest);
>>>>> +        }
>>>>>           tcg_gen_exit_tb(ctx->base.tb, n);
>>>>>       } else {
>>>>>           gen_set_pc_imm(ctx, dest);
>>>>> @@ -543,6 +566,7 @@ static void gen_set_fpr_d(DisasContext *ctx, 
>>>>> int reg_num, TCGv_i64 t)
>>>>>   static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
>>>>>   {
>>>>>       target_ulong next_pc;
>>>>> +    TCGv succ_pc = dest_gpr(ctx, rd);
>>>>>         /* check misaligned: */
>>>>>       next_pc = ctx->base.pc_next + imm;
>>>>> @@ -555,8 +579,10 @@ static void gen_jal(DisasContext *ctx, int 
>>>>> rd, target_ulong imm)
>>>>>           }
>>>>>       }
>>>>>   -    gen_set_gpri(ctx, rd, ctx->pc_succ_insn);
>>>>> -    gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use 
>>>>> this for safety */
>>>>> +    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
>>>>> +    gen_set_gpr(ctx, rd, succ_pc);
>>>>> +
>>>>> +    gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */
>>>>>       ctx->base.is_jmp = DISAS_NORETURN;
>>>>>   }
>>>>>   @@ -1150,6 +1176,7 @@ static void 
>>>>> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>>>>>       RISCVCPU *cpu = RISCV_CPU(cs);
>>>>>       uint32_t tb_flags = ctx->base.tb->flags;
>>>>>   +    ctx->pc_save = ctx->base.pc_first;
>>>>>       ctx->pc_succ_insn = ctx->base.pc_first;
>>>>>       ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
>>>>>       ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
>>>>> @@ -1195,8 +1222,13 @@ static void 
>>>>> riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
>>>>>   static void riscv_tr_insn_start(DisasContextBase *dcbase, 
>>>>> CPUState *cpu)
>>>>>   {
>>>>>       DisasContext *ctx = container_of(dcbase, DisasContext, base);
>>>>> +    target_ulong pc_next = ctx->base.pc_next;
>>>>> +
>>>>> +    if (tb_cflags(dcbase->tb) & CF_PCREL) {
>>>>> +        pc_next &= ~TARGET_PAGE_MASK;
>>>>> +    }
>>>>>   -    tcg_gen_insn_start(ctx->base.pc_next, 0);
>>>>> +    tcg_gen_insn_start(pc_next, 0);
>>>>>       ctx->insn_start = tcg_last_op();
>>>>>   }
Richard Henderson April 4, 2023, 1:56 p.m. UTC | #7
On 4/3/23 19:06, Weiwei Li wrote:
>   static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>   {
> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
> +    TCGv target_pc = dest_gpr(ctx, a->rd);
> +    gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next);
> +    gen_set_gpr(ctx, a->rd, target_pc);
>       return true;
>   }

This is not how I expect a function called "pc plus diff" to work.
It should be simpler:


     TCGv rd = dest_gpr(ctx, a->rd);

     gen_pc_plus_diff(ctx, rd, a->imm);
     gen_set_gpr(ctx, a->rd, rd);

All of the manipulation of cpu_pc, pc_save, and pc_next are all hidden inside the 
function.  All that "add upper immediate to pc" should do is supply the immediate.


r~
Richard Henderson April 4, 2023, 2:05 p.m. UTC | #8
On 4/4/23 00:07, LIU Zhiwei wrote:
> Yes, I think so. I just suspect whether it is easy to read and verify the correctness. And 
> the maintenance for the future.
> 
> 
> 1) Maybe we should split the PCREL to a split patch set, as it is a new feature. The point 
> masking can still use this thread.

Yes.

> 2) For the new patch set for PCREL, process where we need to modify one by one. One clue 
> for recognize where to modify is the ctx pc related fields, such as 
> pc_next/pc_first/succ_insn_pc.
> 
> One thing may worth to try is that don't change the code in insn_trans/trans_X.  Just 
> rename the origin API we need to modify to a new name with _abs suffix. And and a 
> correspond set of API for PCREL with _pcrel suffix.
> 
> For example, in DisasContext, we define
> 
> void (*gen_set_gpri)(DisasContext *ctx, int reg_num, target_long imm);
> 
> In disas_init_fn,
> 
> if (tb_cflags(tb) & CF_PCREL) {
>      gen_set_gpri = gen_set_gpri_pcrel;
> } else {
>      gen_set_gpri = gen_set_gpri_abs;
> }
> 
> Thus we can write the code in trans_insn without think about the PCREL.

No.  Please have a look at how the other conversions have progressed.  E.g.

https://lore.kernel.org/qemu-devel/20220930220312.135327-1-richard.henderson@linaro.org/

Step by step, each of the internal translation functions is converted from absolute to 
relative values.  By operating on relative values, all knowledge of "pc" is centralized, 
and it simplifies the trans_* functions.


r~
Weiwei Li April 4, 2023, 2:33 p.m. UTC | #9
On 2023/4/4 21:56, Richard Henderson wrote:
> On 4/3/23 19:06, Weiwei Li wrote:
>>   static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>>   {
>> -    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
>> +    TCGv target_pc = dest_gpr(ctx, a->rd);
>> +    gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next);
>> +    gen_set_gpr(ctx, a->rd, target_pc);
>>       return true;
>>   }
>
> This is not how I expect a function called "pc plus diff" to work.

Yeah, it's different from the similar function in ARM.

However, it's more in line with the original RISC-V logic.

Maybe we can change a name for the function, such as 
gen_pc_relative_address().

> It should be simpler:
>
>
>     TCGv rd = dest_gpr(ctx, a->rd);
>
>     gen_pc_plus_diff(ctx, rd, a->imm);
>     gen_set_gpr(ctx, a->rd, rd);
>
> All of the manipulation of cpu_pc, pc_save, and pc_next are all hidden 
> inside the function.  All that "add upper immediate to pc" should do 
> is supply the immediate.

If we want to hide all of them in gen_pc_plus_diff,  then we need 
calculate the diff for pc_succ_insn or introduce a new API for it, since 
we need get the successor pc in many instructions.

And the logic for gen_goto_tb or gen_set_pc_imm also need update.

Regards,

Weiwei Li

>
>
> r~
Richard Henderson April 4, 2023, 2:57 p.m. UTC | #10
On 4/4/23 07:33, liweiwei wrote:
> If we want to hide all of them in gen_pc_plus_diff,  then we need calculate the diff for 
> pc_succ_insn or introduce a new API for it, since we need get the successor pc in many 
> instructions.
> 
> And the logic for gen_goto_tb or gen_set_pc_imm also need update.

Yes, exactly.


r~
Weiwei Li April 4, 2023, 3:14 p.m. UTC | #11
On 2023/4/4 22:57, Richard Henderson wrote:
> On 4/4/23 07:33, liweiwei wrote:
>> If we want to hide all of them in gen_pc_plus_diff,  then we need 
>> calculate the diff for pc_succ_insn or introduce a new API for it, 
>> since we need get the successor pc in many instructions.
>>
>> And the logic for gen_goto_tb or gen_set_pc_imm also need update.
>
> Yes, exactly.
>
>
Sorry, I didn't find benefits from this. If we do this, we'll firstly 
calculate the diff = pc_succ_insn - pc_next, then we add it with pc_next 
- pc_save to get the relative address to env->pc.

This seems make things more complicated.

Regards,

Weiwei Li

> r~
Richard Henderson April 4, 2023, 3:27 p.m. UTC | #12
On 4/4/23 08:14, liweiwei wrote:
> 
> On 2023/4/4 22:57, Richard Henderson wrote:
>> On 4/4/23 07:33, liweiwei wrote:
>>> If we want to hide all of them in gen_pc_plus_diff,  then we need calculate the diff 
>>> for pc_succ_insn or introduce a new API for it, since we need get the successor pc in 
>>> many instructions.
>>>
>>> And the logic for gen_goto_tb or gen_set_pc_imm also need update.
>>
>> Yes, exactly.
>>
>>
> Sorry, I didn't find benefits from this. If we do this, we'll firstly calculate the diff = 
> pc_succ_insn - pc_next, then we add it with pc_next - pc_save to get the relative address 
> to env->pc.

It will me simpler because you'll move all of the calculations into a helper function.

The trans_* functions will be supplying a immediate directly:

   * for auipc, this is a->imm,
   * for jalr, this is 0.


r~
Weiwei Li April 4, 2023, 3:39 p.m. UTC | #13
On 2023/4/4 23:27, Richard Henderson wrote:
> On 4/4/23 08:14, liweiwei wrote:
>>
>> On 2023/4/4 22:57, Richard Henderson wrote:
>>> On 4/4/23 07:33, liweiwei wrote:
>>>> If we want to hide all of them in gen_pc_plus_diff,  then we need 
>>>> calculate the diff for pc_succ_insn or introduce a new API for it, 
>>>> since we need get the successor pc in many instructions.
>>>>
>>>> And the logic for gen_goto_tb or gen_set_pc_imm also need update.
>>>
>>> Yes, exactly.
>>>
>>>
>> Sorry, I didn't find benefits from this. If we do this, we'll firstly 
>> calculate the diff = pc_succ_insn - pc_next, then we add it with 
>> pc_next - pc_save to get the relative address to env->pc.
>
> It will me simpler because you'll move all of the calculations into a 
> helper function.
helper? Do you mean gen_pc_plus_diff?
>
> The trans_* functions will be supplying a immediate directly:
>
>   * for auipc, this is a->imm,
Yeah. this will be simpler in trans_, however the total calculation is 
the same. we just move a->imm + pc_next to gen_pc_plus_diff.
>   * for jalr, this is 0.

Not 0, but pc_succ_insn - pc_next. This may be the case in many place.

Regards,

Weiwei Li

>
>
> r~
diff mbox series

Patch

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 1e97473af2..646fa31a59 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -658,16 +658,18 @@  static vaddr riscv_cpu_get_pc(CPUState *cs)
 static void riscv_cpu_synchronize_from_tb(CPUState *cs,
                                           const TranslationBlock *tb)
 {
-    RISCVCPU *cpu = RISCV_CPU(cs);
-    CPURISCVState *env = &cpu->env;
-    RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
+    if (!(tb_cflags(tb) & CF_PCREL)) {
+        RISCVCPU *cpu = RISCV_CPU(cs);
+        CPURISCVState *env = &cpu->env;
+        RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
 
-    tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
+        tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
 
-    if (xl == MXL_RV32) {
-        env->pc = (int32_t) tb->pc;
-    } else {
-        env->pc = tb->pc;
+        if (xl == MXL_RV32) {
+            env->pc = (int32_t) tb->pc;
+        } else {
+            env->pc = tb->pc;
+        }
     }
 }
 
@@ -693,11 +695,18 @@  static void riscv_restore_state_to_opc(CPUState *cs,
     RISCVCPU *cpu = RISCV_CPU(cs);
     CPURISCVState *env = &cpu->env;
     RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
+    target_ulong pc;
+
+    if (tb_cflags(tb) & CF_PCREL) {
+        pc = (env->pc & TARGET_PAGE_MASK) | data[0];
+    } else {
+        pc = data[0];
+    }
 
     if (xl == MXL_RV32) {
-        env->pc = (int32_t)data[0];
+        env->pc = (int32_t)pc;
     } else {
-        env->pc = data[0];
+        env->pc = pc;
     }
     env->bins = data[1];
 }
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
index cc72864d32..7cbbdac5aa 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -38,7 +38,9 @@  static bool trans_lui(DisasContext *ctx, arg_lui *a)
 
 static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
 {
-    gen_set_gpri(ctx, a->rd, a->imm + ctx->base.pc_next);
+    TCGv target_pc = dest_gpr(ctx, a->rd);
+    gen_pc_plus_diff(target_pc, ctx, a->imm + ctx->base.pc_next);
+    gen_set_gpr(ctx, a->rd, target_pc);
     return true;
 }
 
@@ -52,6 +54,7 @@  static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
 {
     TCGLabel *misaligned = NULL;
     TCGv target_pc = tcg_temp_new();
+    TCGv succ_pc = dest_gpr(ctx, a->rd);
 
     tcg_gen_addi_tl(target_pc, get_gpr(ctx, a->rs1, EXT_NONE), a->imm);
     tcg_gen_andi_tl(target_pc, target_pc, (target_ulong)-2);
@@ -68,7 +71,9 @@  static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
         tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned);
     }
 
-    gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn);
+    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
+    gen_set_gpr(ctx, a->rd, succ_pc);
+
     tcg_gen_mov_tl(cpu_pc, target_pc);
     lookup_and_goto_ptr(ctx);
 
@@ -159,6 +164,7 @@  static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
     TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
     TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
     target_ulong next_pc;
+    target_ulong orig_pc_save = ctx->pc_save;
 
     if (get_xl(ctx) == MXL_RV128) {
         TCGv src1h = get_gprh(ctx, a->rs1);
@@ -175,6 +181,7 @@  static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
 
     gen_set_label(l); /* branch taken */
 
+    ctx->pc_save = orig_pc_save;
     next_pc = ctx->base.pc_next + a->imm;
     if (!has_ext(ctx, RVC) && (next_pc & 0x3)) {
         /* misaligned */
@@ -182,8 +189,9 @@  static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
         gen_pc_plus_diff(target_pc, ctx, next_pc);
         gen_exception_inst_addr_mis(ctx, target_pc);
     } else {
-        gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm);
+        gen_goto_tb(ctx, 0, next_pc);
     }
+    ctx->pc_save = -1;
     ctx->base.is_jmp = DISAS_NORETURN;
 
     return true;
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index d434fedb37..4623749602 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -59,6 +59,7 @@  typedef struct DisasContext {
     DisasContextBase base;
     /* pc_succ_insn points to the instruction following base.pc_next */
     target_ulong pc_succ_insn;
+    target_ulong pc_save;
     target_ulong priv_ver;
     RISCVMXL misa_mxl_max;
     RISCVMXL xl;
@@ -225,15 +226,24 @@  static void decode_save_opc(DisasContext *ctx)
 static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
                               target_ulong dest)
 {
-    if (get_xl(ctx) == MXL_RV32) {
-        dest = (int32_t)dest;
+    assert(ctx->pc_save != -1);
+    if (tb_cflags(ctx->base.tb) & CF_PCREL) {
+        tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save);
+        if (get_xl(ctx) == MXL_RV32) {
+            tcg_gen_ext32s_tl(target, target);
+        }
+    } else {
+        if (get_xl(ctx) == MXL_RV32) {
+            dest = (int32_t)dest;
+        }
+        tcg_gen_movi_tl(target, dest);
     }
-    tcg_gen_movi_tl(target, dest);
 }
 
 static void gen_set_pc_imm(DisasContext *ctx, target_ulong dest)
 {
     gen_pc_plus_diff(cpu_pc, ctx, dest);
+    ctx->pc_save = dest;
 }
 
 static void generate_exception(DisasContext *ctx, int excp)
@@ -287,8 +297,21 @@  static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
       * direct block chain benefits will be small.
       */
     if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) {
-        tcg_gen_goto_tb(n);
-        gen_set_pc_imm(ctx, dest);
+        /*
+         * For pcrel, the pc must always be up-to-date on entry to
+         * the linked TB, so that it can use simple additions for all
+         * further adjustments.  For !pcrel, the linked TB is compiled
+         * to know its full virtual address, so we can delay the
+         * update to pc to the unlinked path.  A long chain of links
+         * can thus avoid many updates to the PC.
+         */
+        if (tb_cflags(ctx->base.tb) & CF_PCREL) {
+            gen_set_pc_imm(ctx, dest);
+            tcg_gen_goto_tb(n);
+        } else {
+            tcg_gen_goto_tb(n);
+            gen_set_pc_imm(ctx, dest);
+        }
         tcg_gen_exit_tb(ctx->base.tb, n);
     } else {
         gen_set_pc_imm(ctx, dest);
@@ -543,6 +566,7 @@  static void gen_set_fpr_d(DisasContext *ctx, int reg_num, TCGv_i64 t)
 static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
 {
     target_ulong next_pc;
+    TCGv succ_pc = dest_gpr(ctx, rd);
 
     /* check misaligned: */
     next_pc = ctx->base.pc_next + imm;
@@ -555,8 +579,10 @@  static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
         }
     }
 
-    gen_set_gpri(ctx, rd, ctx->pc_succ_insn);
-    gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this for safety */
+    gen_pc_plus_diff(succ_pc, ctx, ctx->pc_succ_insn);
+    gen_set_gpr(ctx, rd, succ_pc);
+
+    gen_goto_tb(ctx, 0, next_pc); /* must use this for safety */
     ctx->base.is_jmp = DISAS_NORETURN;
 }
 
@@ -1150,6 +1176,7 @@  static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     RISCVCPU *cpu = RISCV_CPU(cs);
     uint32_t tb_flags = ctx->base.tb->flags;
 
+    ctx->pc_save = ctx->base.pc_first;
     ctx->pc_succ_insn = ctx->base.pc_first;
     ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
     ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
@@ -1195,8 +1222,13 @@  static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
 static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    target_ulong pc_next = ctx->base.pc_next;
+
+    if (tb_cflags(dcbase->tb) & CF_PCREL) {
+        pc_next &= ~TARGET_PAGE_MASK;
+    }
 
-    tcg_gen_insn_start(ctx->base.pc_next, 0);
+    tcg_gen_insn_start(pc_next, 0);
     ctx->insn_start = tcg_last_op();
 }