Message ID | 20230518135757.1442654-3-fei2.wu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | TCG code quality tracking | expand |
On 5/18/23 06:57, Fei Wu wrote: > +void HELPER(inc_exec_freq)(void *ptr) > +{ > + TBStatistics *stats = (TBStatistics *) ptr; > + tcg_debug_assert(stats); > + ++stats->executions.normal; > +} ... > +static inline void gen_tb_exec_count(TranslationBlock *tb) > +{ > + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { > + TCGv_ptr ptr = tcg_temp_new_ptr(); > + tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats); > + gen_helper_inc_exec_freq(ptr); > + } > +} This is 3 host instructions, easily expanded inline: --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -11,6 +11,7 @@ #include "qemu/error-report.h" #include "tcg/tcg.h" #include "tcg/tcg-op.h" +#include "tcg/tcg-temp-internal.h" #include "exec/exec-all.h" #include "exec/gen-icount.h" #include "exec/log.h" @@ -18,6 +19,30 @@ #include "exec/plugin-gen.h" #include "exec/replay-core.h" + +static void gen_tb_exec_count(TranslationBlock *tb) +{ + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { + TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); + + tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal); + if (sizeof(tb->tb_stats->executions.normal) == 4) { + TCGv_i32 t = tcg_temp_ebb_new_i32(); + tcg_gen_ld_i32(t, ptr, 0); + tcg_gen_addi_i32(t, t, 1); + tcg_gen_st_i32(t, ptr, 0); + tcg_temp_free_i32(t); + } else { + TCGv_i64 t = tcg_temp_ebb_new_i64(); + tcg_gen_ld_i64(t, ptr, 0); + tcg_gen_addi_i64(t, t, 1); + tcg_gen_st_i64(t, ptr, 0); + tcg_temp_free_i64(t); + } + tcg_temp_free_ptr(ptr); + } +} + bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) { /* Suppress goto_tb if requested. */ I'm not expecially keen on embedding the TBStatistics pointer directly like this; for most hosts we will have to put this constant into the constant pool. Whereas the pointer already exists at tb->tb_stats, and tb is at a constant displacement prior to the code, so we already have mechanisms for generating pc-relative addresses. However, that's premature optimization. Let's get it working first. r~
On 5/23/2023 8:45 AM, Richard Henderson wrote: > On 5/18/23 06:57, Fei Wu wrote: >> +void HELPER(inc_exec_freq)(void *ptr) >> +{ >> + TBStatistics *stats = (TBStatistics *) ptr; >> + tcg_debug_assert(stats); >> + ++stats->executions.normal; >> +} > ... >> +static inline void gen_tb_exec_count(TranslationBlock *tb) >> +{ >> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >> + TCGv_ptr ptr = tcg_temp_new_ptr(); >> + tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats); >> + gen_helper_inc_exec_freq(ptr); >> + } >> +} > > This is 3 host instructions, easily expanded inline: > > --- a/accel/tcg/translator.c > +++ b/accel/tcg/translator.c > @@ -11,6 +11,7 @@ > #include "qemu/error-report.h" > #include "tcg/tcg.h" > #include "tcg/tcg-op.h" > +#include "tcg/tcg-temp-internal.h" > #include "exec/exec-all.h" > #include "exec/gen-icount.h" > #include "exec/log.h" > @@ -18,6 +19,30 @@ > #include "exec/plugin-gen.h" > #include "exec/replay-core.h" > > + > +static void gen_tb_exec_count(TranslationBlock *tb) > +{ > + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { > + TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); > + > + tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal); > + if (sizeof(tb->tb_stats->executions.normal) == 4) { > + TCGv_i32 t = tcg_temp_ebb_new_i32(); > + tcg_gen_ld_i32(t, ptr, 0); > + tcg_gen_addi_i32(t, t, 1); > + tcg_gen_st_i32(t, ptr, 0); > + tcg_temp_free_i32(t); > + } else { > + TCGv_i64 t = tcg_temp_ebb_new_i64(); > + tcg_gen_ld_i64(t, ptr, 0); > + tcg_gen_addi_i64(t, t, 1); > + tcg_gen_st_i64(t, ptr, 0); > + tcg_temp_free_i64(t); > + } > + tcg_temp_free_ptr(ptr); > + } > +} > + Thank you for the method, I will try it and measure the gain, it is indeed the hot path and usually takes a lot of time. Thanks, Fei. > bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) > { > /* Suppress goto_tb if requested. */ > > > I'm not expecially keen on embedding the TBStatistics pointer directly > like this; for most hosts we will have to put this constant into the > constant pool. Whereas the pointer already exists at tb->tb_stats, and > tb is at a constant displacement prior to the code, so we already have > mechanisms for generating pc-relative addresses. > > However, that's premature optimization. Let's get it working first. > > > r~ >
On 5/23/2023 8:45 AM, Richard Henderson wrote: > On 5/18/23 06:57, Fei Wu wrote: >> +void HELPER(inc_exec_freq)(void *ptr) >> +{ >> + TBStatistics *stats = (TBStatistics *) ptr; >> + tcg_debug_assert(stats); >> + ++stats->executions.normal; >> +} > ... >> +static inline void gen_tb_exec_count(TranslationBlock *tb) >> +{ >> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >> + TCGv_ptr ptr = tcg_temp_new_ptr(); >> + tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats); >> + gen_helper_inc_exec_freq(ptr); >> + } >> +} > > This is 3 host instructions, easily expanded inline: > > --- a/accel/tcg/translator.c > +++ b/accel/tcg/translator.c > @@ -11,6 +11,7 @@ > #include "qemu/error-report.h" > #include "tcg/tcg.h" > #include "tcg/tcg-op.h" > +#include "tcg/tcg-temp-internal.h" > #include "exec/exec-all.h" > #include "exec/gen-icount.h" > #include "exec/log.h" > @@ -18,6 +19,30 @@ > #include "exec/plugin-gen.h" > #include "exec/replay-core.h" > > + > +static void gen_tb_exec_count(TranslationBlock *tb) > +{ > + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { > + TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); > + > + tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal); > + if (sizeof(tb->tb_stats->executions.normal) == 4) { > + TCGv_i32 t = tcg_temp_ebb_new_i32(); > + tcg_gen_ld_i32(t, ptr, 0); > + tcg_gen_addi_i32(t, t, 1); > + tcg_gen_st_i32(t, ptr, 0); > + tcg_temp_free_i32(t); > + } else { > + TCGv_i64 t = tcg_temp_ebb_new_i64(); > + tcg_gen_ld_i64(t, ptr, 0); > + tcg_gen_addi_i64(t, t, 1); > + tcg_gen_st_i64(t, ptr, 0); > + tcg_temp_free_i64(t); > + } > + tcg_temp_free_ptr(ptr); > + } > +} > + > bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) > { > /* Suppress goto_tb if requested. */ > > > I'm not expecially keen on embedding the TBStatistics pointer directly > like this; for most hosts we will have to put this constant into the > constant pool. Whereas the pointer already exists at tb->tb_stats, and > tb is at a constant displacement prior to the code, so we already have > mechanisms for generating pc-relative addresses. > > However, that's premature optimization. Let's get it working first. > Here is the coremark results on a 4c qemu-system-riscv64, coremark takes 1cpu in this test. (tb_stats stop) Iterations/Sec : 5358.012664 (tb_stats start all) helper - qatomic_inc(normal) Iterations/Sec : 2416.626390 helper - ++normal Iterations/Sec : 4307.559767 no helper - inline add Iterations/Sec : 5168.930031 Also if coremark runs on 4cpu, tb_stats will cost much more even in the inline case, it's an extreme case though. Thanks, Fei. > > r~ >
On 5/23/2023 8:45 AM, Richard Henderson wrote: > On 5/18/23 06:57, Fei Wu wrote: >> +void HELPER(inc_exec_freq)(void *ptr) >> +{ >> + TBStatistics *stats = (TBStatistics *) ptr; >> + tcg_debug_assert(stats); >> + ++stats->executions.normal; >> +} > ... >> +static inline void gen_tb_exec_count(TranslationBlock *tb) >> +{ >> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >> + TCGv_ptr ptr = tcg_temp_new_ptr(); >> + tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats); >> + gen_helper_inc_exec_freq(ptr); >> + } >> +} > > This is 3 host instructions, easily expanded inline: > > --- a/accel/tcg/translator.c > +++ b/accel/tcg/translator.c > @@ -11,6 +11,7 @@ > #include "qemu/error-report.h" > #include "tcg/tcg.h" > #include "tcg/tcg-op.h" > +#include "tcg/tcg-temp-internal.h" > #include "exec/exec-all.h" > #include "exec/gen-icount.h" > #include "exec/log.h" > @@ -18,6 +19,30 @@ > #include "exec/plugin-gen.h" > #include "exec/replay-core.h" > > + > +static void gen_tb_exec_count(TranslationBlock *tb) > +{ > + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { > + TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); > + > + tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal); > + if (sizeof(tb->tb_stats->executions.normal) == 4) { > + TCGv_i32 t = tcg_temp_ebb_new_i32(); > + tcg_gen_ld_i32(t, ptr, 0); > + tcg_gen_addi_i32(t, t, 1); > + tcg_gen_st_i32(t, ptr, 0); > + tcg_temp_free_i32(t); > + } else { > + TCGv_i64 t = tcg_temp_ebb_new_i64(); > + tcg_gen_ld_i64(t, ptr, 0); > + tcg_gen_addi_i64(t, t, 1); > + tcg_gen_st_i64(t, ptr, 0); > + tcg_temp_free_i64(t); > + } > + tcg_temp_free_ptr(ptr); > + } > +} > + > bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) > { > /* Suppress goto_tb if requested. */ > > > I'm not expecially keen on embedding the TBStatistics pointer directly > like this; for most hosts we will have to put this constant into the > constant pool. Whereas the pointer already exists at tb->tb_stats, and > tb is at a constant displacement prior to the code, so we already have > mechanisms for generating pc-relative addresses. > > However, that's premature optimization. Let's get it working first. > Richard, have you reviewed the whole series? I will integrate your change to next version. Thanks, Fei. > > r~ >
On 5/24/23 06:35, Wu, Fei wrote: > On 5/23/2023 8:45 AM, Richard Henderson wrote: >> On 5/18/23 06:57, Fei Wu wrote: >>> +void HELPER(inc_exec_freq)(void *ptr) >>> +{ >>> + TBStatistics *stats = (TBStatistics *) ptr; >>> + tcg_debug_assert(stats); >>> + ++stats->executions.normal; >>> +} >> ... >>> +static inline void gen_tb_exec_count(TranslationBlock *tb) >>> +{ >>> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >>> + TCGv_ptr ptr = tcg_temp_new_ptr(); >>> + tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats); >>> + gen_helper_inc_exec_freq(ptr); >>> + } >>> +} >> >> This is 3 host instructions, easily expanded inline: >> >> --- a/accel/tcg/translator.c >> +++ b/accel/tcg/translator.c >> @@ -11,6 +11,7 @@ >> #include "qemu/error-report.h" >> #include "tcg/tcg.h" >> #include "tcg/tcg-op.h" >> +#include "tcg/tcg-temp-internal.h" >> #include "exec/exec-all.h" >> #include "exec/gen-icount.h" >> #include "exec/log.h" >> @@ -18,6 +19,30 @@ >> #include "exec/plugin-gen.h" >> #include "exec/replay-core.h" >> >> + >> +static void gen_tb_exec_count(TranslationBlock *tb) >> +{ >> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >> + TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); >> + >> + tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal); >> + if (sizeof(tb->tb_stats->executions.normal) == 4) { >> + TCGv_i32 t = tcg_temp_ebb_new_i32(); >> + tcg_gen_ld_i32(t, ptr, 0); >> + tcg_gen_addi_i32(t, t, 1); >> + tcg_gen_st_i32(t, ptr, 0); >> + tcg_temp_free_i32(t); >> + } else { >> + TCGv_i64 t = tcg_temp_ebb_new_i64(); >> + tcg_gen_ld_i64(t, ptr, 0); >> + tcg_gen_addi_i64(t, t, 1); >> + tcg_gen_st_i64(t, ptr, 0); >> + tcg_temp_free_i64(t); >> + } >> + tcg_temp_free_ptr(ptr); >> + } >> +} >> + >> bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) >> { >> /* Suppress goto_tb if requested. */ >> >> >> I'm not expecially keen on embedding the TBStatistics pointer directly >> like this; for most hosts we will have to put this constant into the >> constant pool. Whereas the pointer already exists at tb->tb_stats, and >> tb is at a constant displacement prior to the code, so we already have >> mechanisms for generating pc-relative addresses. >> >> However, that's premature optimization. Let's get it working first. >> > Richard, have you reviewed the whole series? I will integrate your > change to next version. No, it's difficult to see what's going on. In your next revision, please remove CONFIG_PROFILER entirely first, which was what I was planning to do locally. r~
On 5/25/2023 1:02 AM, Richard Henderson wrote: > On 5/24/23 06:35, Wu, Fei wrote: >> On 5/23/2023 8:45 AM, Richard Henderson wrote: >>> On 5/18/23 06:57, Fei Wu wrote: >>>> +void HELPER(inc_exec_freq)(void *ptr) >>>> +{ >>>> + TBStatistics *stats = (TBStatistics *) ptr; >>>> + tcg_debug_assert(stats); >>>> + ++stats->executions.normal; >>>> +} >>> ... >>>> +static inline void gen_tb_exec_count(TranslationBlock *tb) >>>> +{ >>>> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >>>> + TCGv_ptr ptr = tcg_temp_new_ptr(); >>>> + tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats); >>>> + gen_helper_inc_exec_freq(ptr); >>>> + } >>>> +} >>> >>> This is 3 host instructions, easily expanded inline: >>> >>> --- a/accel/tcg/translator.c >>> +++ b/accel/tcg/translator.c >>> @@ -11,6 +11,7 @@ >>> #include "qemu/error-report.h" >>> #include "tcg/tcg.h" >>> #include "tcg/tcg-op.h" >>> +#include "tcg/tcg-temp-internal.h" >>> #include "exec/exec-all.h" >>> #include "exec/gen-icount.h" >>> #include "exec/log.h" >>> @@ -18,6 +19,30 @@ >>> #include "exec/plugin-gen.h" >>> #include "exec/replay-core.h" >>> >>> + >>> +static void gen_tb_exec_count(TranslationBlock *tb) >>> +{ >>> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >>> + TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); >>> + >>> + tcg_gen_movi_ptr(ptr, >>> (intptr_t)&tb->tb_stats->executions.normal); >>> + if (sizeof(tb->tb_stats->executions.normal) == 4) { >>> + TCGv_i32 t = tcg_temp_ebb_new_i32(); >>> + tcg_gen_ld_i32(t, ptr, 0); >>> + tcg_gen_addi_i32(t, t, 1); >>> + tcg_gen_st_i32(t, ptr, 0); >>> + tcg_temp_free_i32(t); >>> + } else { >>> + TCGv_i64 t = tcg_temp_ebb_new_i64(); >>> + tcg_gen_ld_i64(t, ptr, 0); >>> + tcg_gen_addi_i64(t, t, 1); >>> + tcg_gen_st_i64(t, ptr, 0); >>> + tcg_temp_free_i64(t); >>> + } >>> + tcg_temp_free_ptr(ptr); >>> + } >>> +} >>> + >>> bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) >>> { >>> /* Suppress goto_tb if requested. */ >>> >>> >>> I'm not expecially keen on embedding the TBStatistics pointer directly >>> like this; for most hosts we will have to put this constant into the >>> constant pool. Whereas the pointer already exists at tb->tb_stats, and >>> tb is at a constant displacement prior to the code, so we already have >>> mechanisms for generating pc-relative addresses. >>> >>> However, that's premature optimization. Let's get it working first. >>> >> Richard, have you reviewed the whole series? I will integrate your >> change to next version. > > No, it's difficult to see what's going on. > In your next revision, please remove CONFIG_PROFILER entirely first, > which was what I was planning to do locally. > OK, I will update it. Thanks, Fei. > > r~
On 5/25/2023 1:02 AM, Richard Henderson wrote: > On 5/24/23 06:35, Wu, Fei wrote: >> On 5/23/2023 8:45 AM, Richard Henderson wrote: >>> On 5/18/23 06:57, Fei Wu wrote: >>>> +void HELPER(inc_exec_freq)(void *ptr) >>>> +{ >>>> + TBStatistics *stats = (TBStatistics *) ptr; >>>> + tcg_debug_assert(stats); >>>> + ++stats->executions.normal; >>>> +} >>> ... >>>> +static inline void gen_tb_exec_count(TranslationBlock *tb) >>>> +{ >>>> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >>>> + TCGv_ptr ptr = tcg_temp_new_ptr(); >>>> + tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats); >>>> + gen_helper_inc_exec_freq(ptr); >>>> + } >>>> +} >>> >>> This is 3 host instructions, easily expanded inline: >>> >>> --- a/accel/tcg/translator.c >>> +++ b/accel/tcg/translator.c >>> @@ -11,6 +11,7 @@ >>> #include "qemu/error-report.h" >>> #include "tcg/tcg.h" >>> #include "tcg/tcg-op.h" >>> +#include "tcg/tcg-temp-internal.h" >>> #include "exec/exec-all.h" >>> #include "exec/gen-icount.h" >>> #include "exec/log.h" >>> @@ -18,6 +19,30 @@ >>> #include "exec/plugin-gen.h" >>> #include "exec/replay-core.h" >>> >>> + >>> +static void gen_tb_exec_count(TranslationBlock *tb) >>> +{ >>> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >>> + TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); >>> + >>> + tcg_gen_movi_ptr(ptr, >>> (intptr_t)&tb->tb_stats->executions.normal); >>> + if (sizeof(tb->tb_stats->executions.normal) == 4) { >>> + TCGv_i32 t = tcg_temp_ebb_new_i32(); >>> + tcg_gen_ld_i32(t, ptr, 0); >>> + tcg_gen_addi_i32(t, t, 1); >>> + tcg_gen_st_i32(t, ptr, 0); >>> + tcg_temp_free_i32(t); >>> + } else { >>> + TCGv_i64 t = tcg_temp_ebb_new_i64(); >>> + tcg_gen_ld_i64(t, ptr, 0); >>> + tcg_gen_addi_i64(t, t, 1); >>> + tcg_gen_st_i64(t, ptr, 0); >>> + tcg_temp_free_i64(t); >>> + } >>> + tcg_temp_free_ptr(ptr); >>> + } >>> +} >>> + >>> bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) >>> { >>> /* Suppress goto_tb if requested. */ >>> >>> >>> I'm not expecially keen on embedding the TBStatistics pointer directly >>> like this; for most hosts we will have to put this constant into the >>> constant pool. Whereas the pointer already exists at tb->tb_stats, and >>> tb is at a constant displacement prior to the code, so we already have >>> mechanisms for generating pc-relative addresses. >>> >>> However, that's premature optimization. Let's get it working first. >>> >> Richard, have you reviewed the whole series? I will integrate your >> change to next version. > > No, it's difficult to see what's going on. > In your next revision, please remove CONFIG_PROFILER entirely first, > which was what I was planning to do locally. > This will change the series dramatically, the patches probably need to be reorganized, a lot of rebases are required, and some functions such as tcg_dump_op_count which is in the scope of CONFIG_PROFILER will be removed first then added backed. It takes time, I will try to send it out asap. Thanks, Fei. > > r~
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index bc0e1c3299..a3f24c62a0 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -25,6 +25,7 @@ #include "trace.h" #include "disas/disas.h" #include "exec/exec-all.h" +#include "exec/tb-stats.h" #include "tcg/tcg.h" #include "qemu/atomic.h" #include "qemu/rcu.h" @@ -564,7 +565,12 @@ void cpu_exec_step_atomic(CPUState *cpu) mmap_unlock(); } + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { + tb->tb_stats->executions.atomic++; + } + cpu_exec_enter(cpu); + /* execute the generated code */ trace_exec_tb(tb, pc); cpu_tb_exec(cpu, tb, &tb_exit); diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c index f988bd8a31..143a52ef5c 100644 --- a/accel/tcg/tb-stats.c +++ b/accel/tcg/tb-stats.c @@ -22,6 +22,7 @@ enum TBStatsStatus { }; static enum TBStatsStatus tcg_collect_tb_stats; +static uint32_t default_tbstats_flag; void init_tb_stats_htable(void) { @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void) { return tcg_collect_tb_stats == TB_STATS_PAUSED; } + +uint32_t get_default_tbstats_flag(void) +{ + return default_tbstats_flag; +} diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index e4e030043f..85cb795732 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -27,6 +27,7 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" +#include "exec/tb-stats.h" #include "disas/disas.h" #include "exec/log.h" #include "tcg/tcg.h" @@ -148,3 +149,10 @@ void HELPER(exit_atomic)(CPUArchState *env) { cpu_loop_exit_atomic(env_cpu(env), GETPC()); } + +void HELPER(inc_exec_freq)(void *ptr) +{ + TBStatistics *stats = (TBStatistics *) ptr; + tcg_debug_assert(stats); + ++stats->executions.normal; +} diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index 6f8c2061d0..8d310e39c0 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -41,6 +41,7 @@ DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr) DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32) DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32) +DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr) DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG, i32, env, i64, i32, i32, i32) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index a5ebc5e9e2..31d4ddf4ab 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -312,6 +312,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc, new_stats->pc = pc; new_stats->cs_base = cs_base; new_stats->flags = flags; + new_stats->stats_enabled = get_default_tbstats_flag(); /* * All initialisation must be complete before we insert into qht @@ -408,9 +409,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu, /* * We want to fetch the stats structure before we start code * generation so we can count interesting things about this - * generation. + * generation. If dfilter is in effect we will only collect stats + * for the specified range. */ - if (tb_stats_collection_enabled()) { + if (tb_stats_collection_enabled() && + qemu_log_in_addr_range(tb->pc)) { tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags); } else { tb->tb_stats = NULL; diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 7bda43ff61..165072c8c3 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -18,6 +18,15 @@ #include "exec/plugin-gen.h" #include "exec/replay-core.h" +static inline void gen_tb_exec_count(TranslationBlock *tb) +{ + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { + TCGv_ptr ptr = tcg_temp_new_ptr(); + tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats); + gen_helper_inc_exec_freq(ptr); + } +} + bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) { /* Suppress goto_tb if requested. */ @@ -56,6 +65,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, /* Start translating. */ gen_tb_start(db->tb); + gen_tb_exec_count(tb); ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index f6de79a6b4..20e7835ff0 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -2,6 +2,7 @@ #define GEN_ICOUNT_H #include "exec/exec-all.h" +#include "exec/tb-stats.h" /* Helpers for instruction counting code generation. */ diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h index 87ee3d902e..fa71eb6f0c 100644 --- a/include/exec/tb-stats-flags.h +++ b/include/exec/tb-stats-flags.h @@ -11,6 +11,9 @@ #ifndef TB_STATS_FLAGS #define TB_STATS_FLAGS +#define TB_NOTHING (1 << 0) +#define TB_EXEC_STATS (1 << 1) + /* TBStatistic collection controls */ void enable_collect_tb_stats(void); void disable_collect_tb_stats(void); @@ -18,4 +21,6 @@ void pause_collect_tb_stats(void); bool tb_stats_collection_enabled(void); bool tb_stats_collection_paused(void); +uint32_t get_default_tbstats_flag(void); + #endif diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h index b519465665..eb1fa92a4e 100644 --- a/include/exec/tb-stats.h +++ b/include/exec/tb-stats.h @@ -31,6 +31,9 @@ #include "exec/tb-stats-flags.h" #include "tcg/tcg.h" +#define tb_stats_enabled(tb, JIT_STATS) \ + (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS)) + typedef struct TBStatistics TBStatistics; /* @@ -47,6 +50,16 @@ struct TBStatistics { uint32_t flags; /* cs_base isn't included in the hash but we do check for matches */ target_ulong cs_base; + + /* which stats are enabled for this TBStats */ + uint32_t stats_enabled; + + /* Execution stats */ + struct { + unsigned long normal; + unsigned long atomic; + } executions; + }; bool tb_stats_cmp(const void *ap, const void *bp);