@@ -16,6 +16,7 @@
#include "sysemu/cpu-timers.h"
#include "sysemu/tcg.h"
#include "tcg/tcg.h"
+#include "exec/tb-stats.h"
#include "internal.h"
@@ -70,6 +71,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
{
g_autoptr(GString) buf = g_string_new("");
+ if (!tb_stats_collection_enabled()) {
+ error_setg(errp, "TB information not being recorded");
+ return NULL;
+ }
+
if (!tcg_enabled()) {
error_setg(errp,
"Opcode count information is only available with accel=tcg");
@@ -9,6 +9,10 @@
#include "qemu/osdep.h"
#include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+
+#include "qemu/qemu-print.h"
#include "exec/tb-stats.h"
#include "tb-context.h"
@@ -22,6 +26,71 @@ enum TBStatsStatus {
static enum TBStatsStatus tcg_collect_tb_stats;
static uint32_t tbstats_flag;
+struct jit_profile_info {
+ uint64_t translations;
+ uint64_t aborted;
+ uint64_t ops;
+ unsigned ops_max;
+ uint64_t del_ops;
+ uint64_t temps;
+ unsigned temps_max;
+ uint64_t host;
+ uint64_t guest;
+ uint64_t search_data;
+};
+
+#define stat_per_translation(stat, name) \
+ (stat->translations.total ? stat->name / stat->translations.total : 0)
+
+/* accumulate the statistics from all TBs */
+static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
+{
+ struct jit_profile_info *jpi = userp;
+ TBStatistics *tbs = p;
+
+ jpi->translations += tbs->translations.total;
+ jpi->ops += tbs->code.num_tcg_ops;
+ if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
+ jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
+ }
+ jpi->del_ops += tbs->code.deleted_ops;
+ jpi->temps += tbs->code.temps;
+ if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
+ jpi->temps_max = stat_per_translation(tbs, code.temps);
+ }
+ jpi->host += tbs->code.out_len;
+ jpi->guest += tbs->code.in_len;
+ jpi->search_data += tbs->code.search_out_len;
+}
+
+/* dump JIT statistics using TBStats */
+void dump_jit_profile_info(GString *buf)
+{
+ if (!tb_stats_collection_enabled()) {
+ return;
+ }
+
+ struct jit_profile_info *jpi = g_new0(struct jit_profile_info, 1);
+
+ qht_iter(&tb_ctx.tb_stats, collect_jit_profile_info, jpi);
+
+ if (jpi->translations) {
+ g_string_append_printf(buf, "translated TBs %" PRId64 "\n",
+ jpi->translations);
+ g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n",
+ jpi->ops / (double) jpi->translations, jpi->ops_max);
+ g_string_append_printf(buf, "deleted ops/TB %0.2f\n",
+ jpi->del_ops / (double) jpi->translations);
+ g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n",
+ jpi->temps / (double) jpi->translations, jpi->temps_max);
+ g_string_append_printf(buf, "avg host code/TB %0.1f\n",
+ jpi->host / (double) jpi->translations);
+ g_string_append_printf(buf, "avg search data/TB %0.1f\n",
+ jpi->search_data / (double) jpi->translations);
+ }
+ g_free(jpi);
+}
+
void init_tb_stats_htable(void)
{
if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
@@ -266,7 +266,7 @@ void page_init(void)
*/
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
vaddr pc, void *host_pc,
- int *max_insns, int64_t *ti)
+ int *max_insns)
{
int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
if (unlikely(ret != 0)) {
@@ -294,6 +294,8 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
new_stats->pc = pc;
new_stats->cs_base = cs_base;
new_stats->flags = flags;
+ new_stats->tbs = g_ptr_array_new();
+ qemu_mutex_init(&new_stats->jit_stats_lock);
/*
* All initialisation must be complete before we insert into qht
@@ -307,6 +309,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
* If there is already a TBStatistic for this TB from a previous flush
* then just make the new TB point to the older TBStatistic
*/
+ g_ptr_array_free(new_stats->tbs, true);
g_free(new_stats);
return existing_stats;
} else {
@@ -325,7 +328,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb_page_addr_t phys_pc;
tcg_insn_unit *gen_code_buf;
int gen_code_size, search_size, max_insns;
- int64_t ti;
void *host_pc;
assert_memory_lock();
@@ -399,7 +401,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->tb_stats = NULL;
}
- gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
+ gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns);
if (unlikely(gen_code_size < 0)) {
switch (gen_code_size) {
case -1:
@@ -553,6 +555,29 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
return tb;
}
+ /*
+ * Collect JIT stats when enabled. We batch them all up here to
+ * avoid spamming the cache with atomic accesses
+ */
+ if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+ TBStatistics *ts = tb->tb_stats;
+ qemu_mutex_lock(&ts->jit_stats_lock);
+
+ ts->code.num_tcg_ops_opt += tcg_ctx->nb_ops;
+ ts->code.in_len += tb->size;
+ ts->code.out_len += tb->tc.size;
+ ts->code.search_out_len += search_size;
+
+ ts->translations.total++;
+ if (tb_page_addr1(tb) != -1) {
+ ts->translations.spanning++;
+ }
+
+ g_ptr_array_add(ts->tbs, tb);
+
+ qemu_mutex_unlock(&ts->jit_stats_lock);
+ }
+
/*
* Insert TB into the corresponding region tree before publishing it
* through QHT. Otherwise rewinding happened in the TB might fail to
@@ -248,6 +248,11 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
tb->size = db->pc_next - db->pc_first;
tb->icount = db->num_insns;
+ /* Save number of guest instructions for TB_JIT_STATS */
+ if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+ tb->tb_stats->code.num_guest_inst += db->num_insns;
+ }
+
if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
&& qemu_log_in_addr_range(db->pc_first)) {
FILE *logfile = qemu_log_trylock();
@@ -13,6 +13,7 @@
#define TB_NONE_STATS (0) /* no stats */
#define TB_EXEC_STATS (1 << 0)
+#define TB_JIT_STATS (1 << 1)
/* TBStatistic collection controls */
void enable_collect_tb_stats(void);
@@ -53,6 +53,34 @@ struct TBStatistics {
unsigned long normal;
unsigned long atomic;
} executions;
+
+ /* JIT Stats - protected by lock */
+ QemuMutex jit_stats_lock;
+
+ /* Sum of all operations for all translations */
+ struct {
+ unsigned num_guest_inst;
+ unsigned num_tcg_ops;
+ unsigned num_tcg_ops_opt;
+ unsigned spills;
+
+ unsigned temps;
+ unsigned deleted_ops;
+ unsigned in_len;
+ unsigned out_len;
+ unsigned search_out_len;
+ } code;
+
+ struct {
+ unsigned long total;
+ unsigned long spanning;
+ } translations;
+
+ /*
+ * All persistent (cached) TranslationBlocks using
+ * this TBStats structure. Has to be reset on a tb_flush.
+ */
+ GPtrArray *tbs;
};
bool tb_stats_cmp(const void *ap, const void *bp);
@@ -60,4 +88,6 @@ bool tb_stats_cmp(const void *ap, const void *bp);
void init_tb_stats_htable(void);
bool tb_stats_enabled(TranslationBlock *tb, uint32_t flag);
+void dump_jit_profile_info(GString *buf);
+
#endif
@@ -478,6 +478,14 @@ static inline TCGRegSet output_pref(const TCGOp *op, unsigned i)
return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0;
}
+/*
+ * The TCGProfile structure holds data for the lifetime of the translator.
+ */
+typedef struct TCGProfile {
+ /* Lifetime count of TCGOps per TCGContext when tb_stats enabled */
+ size_t table_op_count[NB_OPS];
+} TCGProfile;
+
struct TCGContext {
uint8_t *pool_cur, *pool_end;
TCGPool *pool_first, *pool_current, *pool_first_large;
@@ -507,6 +515,8 @@ struct TCGContext {
tcg_insn_unit *code_buf; /* pointer for start of tb */
tcg_insn_unit *code_ptr; /* pointer for running end of tb */
+ TCGProfile prof;
+
#ifdef CONFIG_DEBUG_TCG
int goto_tb_issue_mask;
const TCGOpcode *vecop_list;
@@ -36,6 +36,7 @@
#include "qemu/timer.h"
#include "exec/translation-block.h"
#include "exec/tlb-common.h"
+#include "exec/tb-stats.h"
#include "tcg/tcg-op-common.h"
#if UINTPTR_MAX == UINT32_MAX
@@ -3033,6 +3034,9 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
QTAILQ_REMOVE(&s->ops, op, link);
QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
s->nb_ops--;
+ if (tb_stats_enabled(s->gen_tb, TB_JIT_STATS)) {
+ s->gen_tb->tb_stats->code.deleted_ops++;
+ }
}
void tcg_remove_ops_after(TCGOp *op)
@@ -4198,6 +4202,10 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
}
/* We must spill something. */
+ if (tb_stats_enabled(s->gen_tb, TB_JIT_STATS)) {
+ s->gen_tb->tb_stats->code.spills++;
+ }
+
for (j = f; j < 2; j++) {
TCGRegSet set = reg_ct[j];
@@ -5902,9 +5910,17 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
}
-void tcg_dump_op_count(GString *buf)
+static void collect_tcg_profiler(TCGProfile *prof)
{
- g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+ unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+ unsigned int i;
+
+ for (i = 0; i < n_ctxs; i++) {
+ TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+ for (i = 0; i < NB_OPS; i++) {
+ prof->table_op_count[i] += s->prof.table_op_count[i];
+ }
+ }
}
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
@@ -5912,6 +5928,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
int i, start_words, num_insns;
TCGOp *op;
+ /* save pre-optimisation op count */
+ if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+ tb->tb_stats->code.num_tcg_ops += s->nb_ops;
+ tb->tb_stats->code.temps += s->nb_temps;
+ }
+
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
&& qemu_log_in_addr_range(pc_start))) {
FILE *logfile = qemu_log_trylock();
@@ -6003,6 +6025,13 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
s->gen_insn_data =
tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
+ if (tb_stats_collection_enabled()) {
+ QTAILQ_FOREACH(op, &s->ops, link) {
+ TCGOpcode opc = op->opc;
+ s->prof.table_op_count[opc]++;
+ }
+ }
+
num_insns = -1;
QTAILQ_FOREACH(op, &s->ops, link) {
TCGOpcode opc = op->opc;
@@ -6101,9 +6130,21 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
return tcg_current_code_size(s);
}
+void tcg_dump_op_count(GString *buf)
+{
+ TCGProfile prof = {};
+ int i;
+
+ collect_tcg_profiler(&prof);
+ for (i = 0; i < NB_OPS; i++) {
+ g_string_append_printf(buf, "%s %" PRId64 "\n",
+ tcg_op_defs[i].name, prof.table_op_count[i]);
+ }
+}
+
void tcg_dump_info(GString *buf)
{
- g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+ dump_jit_profile_info(buf);
}
#ifdef ELF_HOST_MACHINE