@@ -14,6 +14,7 @@
#include "sysemu/cpus.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/tcg.h"
+#include "exec/tb-stats.h"
#include "internal.h"
@@ -55,6 +56,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
{
g_autoptr(GString) buf = g_string_new("");
+ if (!tb_stats_collection_enabled()) {
+ error_report("TB information not being recorded.");
+ return NULL;
+ }
+
if (!tcg_enabled()) {
error_setg(errp,
"Opcode count information is only available with accel=tcg");
@@ -66,34 +72,15 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
return human_readable_text_from_str(buf);
}
-#ifdef CONFIG_PROFILER
-
HumanReadableText *qmp_x_query_profile(Error **errp)
{
g_autoptr(GString) buf = g_string_new("");
- static int64_t last_cpu_exec_time;
- int64_t cpu_exec_time;
- int64_t delta;
-
- cpu_exec_time = tcg_cpu_exec_time();
- delta = cpu_exec_time - last_cpu_exec_time;
-
- g_string_append_printf(buf, "async time %" PRId64 " (%0.3f)\n",
- dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
- g_string_append_printf(buf, "qemu time %" PRId64 " (%0.3f)\n",
- delta, delta / (double)NANOSECONDS_PER_SECOND);
- last_cpu_exec_time = cpu_exec_time;
+
+ dump_jit_exec_time_info(dev_time);
dev_time = 0;
return human_readable_text_from_str(buf);
}
-#else
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
- error_setg(errp, "Internal profiler not compiled");
- return NULL;
-}
-#endif
static void hmp_tcg_register(void)
{
@@ -13,6 +13,7 @@
#include "tcg/tcg.h"
#include "qemu/qemu-print.h"
+#include "qemu/timer.h"
#include "exec/tb-stats.h"
#include "tb-context.h"
@@ -41,6 +42,13 @@ struct jit_profile_info {
uint64_t host;
uint64_t guest;
uint64_t search_data;
+
+ uint64_t interm_time;
+ uint64_t code_time;
+ uint64_t restore_count;
+ uint64_t restore_time;
+ uint64_t opt_time;
+ uint64_t la_time;
};
/* accumulate the statistics from all TBs */
@@ -62,6 +70,35 @@ static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
jpi->host += tbs->code.out_len;
jpi->guest += tbs->code.in_len;
jpi->search_data += tbs->code.search_out_len;
+
+ jpi->interm_time += stat_per_translation(tbs, gen_times.ir);
+ jpi->opt_time += stat_per_translation(tbs, gen_times.ir_opt);
+ jpi->la_time += stat_per_translation(tbs, gen_times.la);
+ jpi->code_time += stat_per_translation(tbs, gen_times.code);
+
+ /*
+ * The restore time covers how long we have spent restoring state
+ * from a given TB (e.g. recovering from a fault). It is therefor
+ * not related to the number of translations we have done.
+ */
+ jpi->restore_time += tbs->tb_restore_time;
+ jpi->restore_count += tbs->tb_restore_count;
+}
+
+void dump_jit_exec_time_info(uint64_t dev_time)
+{
+ static uint64_t last_cpu_exec_time;
+ uint64_t cpu_exec_time;
+ uint64_t delta;
+
+ cpu_exec_time = tcg_cpu_exec_time();
+ delta = cpu_exec_time - last_cpu_exec_time;
+
+ qemu_printf("async time %" PRId64 " (%0.3f)\n",
+ dev_time, dev_time / (double) NANOSECONDS_PER_SECOND);
+ qemu_printf("qemu time %" PRId64 " (%0.3f)\n",
+ delta, delta / (double) NANOSECONDS_PER_SECOND);
+ last_cpu_exec_time = cpu_exec_time;
}
/* dump JIT statisticis using TCGProfile and TBStats */
@@ -88,6 +125,47 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
jpi->host / (double) jpi->translations);
g_string_append_printf(buf, "avg search data/TB %0.1f\n",
jpi->search_data / (double) jpi->translations);
+
+ uint64_t tot = jpi->interm_time + jpi->code_time;
+
+ g_string_append_printf(buf, "JIT cycles %" PRId64
+ " (%0.3fs at 2.4 GHz)\n",
+ tot, tot / 2.4e9);
+ g_string_append_printf(buf, " cycles/op %0.1f\n",
+ jpi->ops ? (double)tot / jpi->ops : 0);
+ g_string_append_printf(buf, " cycles/in byte %0.1f\n",
+ jpi->guest ? (double)tot / jpi->guest : 0);
+ g_string_append_printf(buf, " cycles/out byte %0.1f\n",
+ jpi->host ? (double)tot / jpi->host : 0);
+ g_string_append_printf(buf, " cycles/search byte %0.1f\n",
+ jpi->search_data ? (double)tot / jpi->search_data : 0);
+ if (tot == 0) {
+ tot = 1;
+ }
+
+ g_string_append_printf(buf, " gen_interm time %0.1f%%\n",
+ (double)jpi->interm_time / tot * 100.0);
+ g_string_append_printf(buf, " gen_code time %0.1f%%\n",
+ (double)jpi->code_time / tot * 100.0);
+
+ g_string_append_printf(buf, " optim./code time %0.1f%%\n",
+ (double)jpi->opt_time / (jpi->code_time ? jpi->code_time : 1)
+ * 100.0);
+ g_string_append_printf(buf, " liveness/code time %0.1f%%\n",
+ (double)jpi->la_time / (jpi->code_time ? jpi->code_time : 1)
+ * 100.0);
+
+ g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n",
+ jpi->restore_count);
+ g_string_append_printf(buf, " avg cycles %0.1f\n",
+ jpi->restore_count ?
+ (double)jpi->restore_time / jpi->restore_count : 0);
+
+ if (s) {
+ g_string_append_printf(buf, "cpu exec time %" PRId64 " (%0.3fs)\n",
+ s->cpu_exec_time,
+ s->cpu_exec_time / (double) NANOSECONDS_PER_SECOND);
+ }
}
g_free(jpi);
}
@@ -70,20 +70,17 @@ void tcg_cpus_destroy(CPUState *cpu)
int tcg_cpus_exec(CPUState *cpu)
{
int ret;
-#ifdef CONFIG_PROFILER
- int64_t ti;
-#endif
+ uint64_t ti;
+
assert(tcg_enabled());
-#ifdef CONFIG_PROFILER
ti = profile_getclock();
-#endif
+
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
-#ifdef CONFIG_PROFILER
- qatomic_set(&tcg_ctx->prof.cpu_exec_time,
- tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
-#endif
+
+ qatomic_add(&tcg_ctx->prof.cpu_exec_time, profile_getclock() - ti);
+
return ret;
}
@@ -564,6 +564,9 @@ typedef struct TCGProfile {
uint64_t gen_opt_done_time;
uint64_t gen_la_done_time;
uint64_t gen_code_done_time;
+
+ /* Lifetime count of TCGOps per TCGContext */
+ uint64_t table_op_count[NB_OPS];
} TCGProfile;
struct TCGContext {
@@ -925,7 +928,7 @@ static inline TCGv_ptr tcg_temp_new_ptr(void)
return temp_tcgv_ptr(t);
}
-int64_t tcg_cpu_exec_time(void);
+uint64_t tcg_cpu_exec_time(void);
void tcg_dump_info(GString *buf);
void tcg_dump_op_count(GString *buf);
@@ -720,18 +720,12 @@ static bool main_loop_should_exit(int *status)
int qemu_main_loop(void)
{
int status = EXIT_SUCCESS;
-#ifdef CONFIG_PROFILER
- int64_t ti;
-#endif
+ uint64_t ti;
while (!main_loop_should_exit(&status)) {
-#ifdef CONFIG_PROFILER
ti = profile_getclock();
-#endif
main_loop_wait(false);
-#ifdef CONFIG_PROFILER
dev_time += profile_getclock() - ti;
-#endif
}
return status;
@@ -4816,25 +4816,13 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
}
}
-#ifdef CONFIG_PROFILER
-
/* avoid copy/paste errors */
#define PROF_ADD(to, from, field) \
do { \
(to)->field += qatomic_read(&((from)->field)); \
} while (0)
-#define PROF_MAX(to, from, field) \
- do { \
- typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
- if (val__ > (to)->field) { \
- (to)->field = val__; \
- } \
- } while (0)
-
-/* Pass in a zero'ed @prof */
-static inline
-void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
+static void collect_tcg_profiler(TCGProfile *prof)
{
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
unsigned int i;
@@ -4843,55 +4831,19 @@ void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
TCGContext *s = qatomic_read(&tcg_ctxs[i]);
const TCGProfile *orig = &s->prof;
- if (counters) {
- PROF_ADD(prof, orig, cpu_exec_time);
- PROF_ADD(prof, orig, interm_time);
- PROF_ADD(prof, orig, code_time);
- PROF_ADD(prof, orig, la_time);
- PROF_ADD(prof, orig, opt_time);
- PROF_ADD(prof, orig, restore_count);
- PROF_ADD(prof, orig, restore_time);
- }
- if (table) {
- int i;
+ PROF_ADD(prof, orig, cpu_exec_time);
- for (i = 0; i < NB_OPS; i++) {
- PROF_ADD(prof, orig, table_op_count[i]);
- }
+ for (i = 0; i < NB_OPS; i++) {
+ PROF_ADD(prof, orig, table_op_count[i]);
}
}
}
-#undef PROF_ADD
-#undef PROF_MAX
-
-static void tcg_profile_snapshot_counters(TCGProfile *prof)
-{
- tcg_profile_snapshot(prof, true, false);
-}
-
-static void tcg_profile_snapshot_table(TCGProfile *prof)
-{
- tcg_profile_snapshot(prof, false, true);
-}
-
-void tcg_dump_op_count(GString *buf)
-{
- TCGProfile prof = {};
- int i;
-
- tcg_profile_snapshot_table(&prof);
- for (i = 0; i < NB_OPS; i++) {
- g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
- prof.table_op_count[i]);
- }
-}
-
-int64_t tcg_cpu_exec_time(void)
+uint64_t tcg_cpu_exec_time(void)
{
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
unsigned int i;
- int64_t ret = 0;
+ uint64_t ret = 0;
for (i = 0; i < n_ctxs; i++) {
const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
@@ -4901,19 +4853,6 @@ int64_t tcg_cpu_exec_time(void)
}
return ret;
}
-#else
-void tcg_dump_op_count(GString *buf)
-{
- g_string_append_printf(buf, "[TCG profiler not compiled]\n");
-}
-
-int64_t tcg_cpu_exec_time(void)
-{
- error_report("%s: TCG profiler not compiled", __func__);
- exit(EXIT_FAILURE);
-}
-#endif
-
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
{
@@ -5029,14 +4968,17 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
s->pool_labels = NULL;
#endif
+ if (tb_stats_collection_enabled()) {
+ QTAILQ_FOREACH(op, &s->ops, link) {
+ TCGOpcode opc = op->opc;
+ s->prof.table_op_count[opc]++;
+ }
+ }
+
num_insns = -1;
QTAILQ_FOREACH(op, &s->ops, link) {
TCGOpcode opc = op->opc;
-#ifdef CONFIG_PROFILER
- qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
-#endif
-
switch (opc) {
case INDEX_op_mov_i32:
case INDEX_op_mov_i64:
@@ -5136,14 +5078,24 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
return tcg_current_code_size(s);
}
+void tcg_dump_op_count(GString *buf)
+{
+ TCGProfile prof = {};
+ int i;
+
+ collect_tcg_profiler(&prof);
+ for (i = 0; i < NB_OPS; i++) {
+ g_string_append_printf(buf, "%s %" PRId64 "\n",
+ tcg_op_defs[i].name, prof.table_op_count[i]);
+ }
+}
+
void tcg_dump_info(GString *buf)
{
TCGProfile *s = NULL;
-#ifdef CONFIG_PROFILER
TCGProfile prof = {};
- tcg_profile_snapshot_counters(&prof);
s = &prof;
-#endif
+ collect_tcg_profiler(s);
dump_jit_profile_info(s, buf);
}