@@ -1,6 +1,8 @@
DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
+DEF_HELPER_2(cross_page_check, i32, env, tl)
+
DEF_HELPER_3(write_eflags, void, env, tl, i32)
DEF_HELPER_1(read_eflags, tl, env)
DEF_HELPER_2(divb_AL, void, env, tl)
@@ -637,3 +637,8 @@ void helper_wrpkru(CPUX86State *env, uint32_t ecx, uint64_t val)
env->pkru = val;
tlb_flush(cs);
}
+
+uint32_t helper_cross_page_check(CPUX86State *env, target_ulong vaddr)
+{
+ return !!tb_from_jmp_cache(env, vaddr);
+}
@@ -2153,7 +2153,19 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
gen_jmp_im(eip);
tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
} else {
- /* jump to another page: currently not optimized */
+ /* jump to another page */
+ TCGv vaddr = tcg_const_tl(eip);
+ TCGv_i32 valid = tcg_temp_new_i32();
+ TCGLabel *label = gen_new_label();
+
+ gen_helper_cross_page_check(valid, cpu_env, vaddr);
+ tcg_temp_free(vaddr);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, valid, 0, label);
+ tcg_temp_free_i32(valid);
+ tcg_gen_goto_tb(tb_num);
+ gen_jmp_im(eip);
+ tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
+ gen_set_label(label);
gen_jmp_im(eip);
gen_eob(s);
}
Instead of unconditionally exiting to the exec loop, add a helper to check whether the target TB is valid. As long as the hit rate in tb_jmp_cache remains high, this improves performance. Measurements: - specINT 2006 (test set), x86_64-softmmu. Host: Intel i7-4790K @ 4.00GHz Y axis: Speedup over 95b31d70 1.3x+-+-------------------------------------------------------------+-+ | cross $$ | 1.25x+-+.............................................................+-+ | | 1.2x+-+.............................................................+-+ | : | | : | 1.15x+-+.............................................................+-+ | $$$$ $$$$ +++ : | 1.1x+-+.........$..$.$..$...........................................+-+ | $ $ $ $ $$$ $$$$ | 1.05x+-+.........$..$.$..$.....................$.$.$$$$......$..$....+-+ | $ $ $ $ +++ +++ +++ $+$ $++$ +++ $: $ $$$$ | | +++ $ $ $ $ +++ $$$ : : $ $ $ $ $$$$ $: $ $++$ | 1x+-$$$$G$$$$_$EM$_$ro$s$$$..$.$.......$$$..$.$.$..$.$..$.$..$.$..$-+ | $++$ $ :$ $ $ $ $ $ $ $ $ : $+$ $ $ $ $ $++$ $: $ $ $ | 0.95x+-$..$.$..$.$..$.$..$.$.$..$.$..$$$..$.$..$.$.$..$.$..$.$..$.$..$-+ | $ $ $ $ $ $ $ $ $ $ $ $ $:$ $ $ $ $ $ $ $ $ $ $ $ $ | 0.9x+-$$$$-$$$$-$$$$-$$$$-$$$--$$$--$$$--$$$--$$$-$$$$-$$$$-$$$$-$$$$-+ astarbzip2gcc gobmh264rehmlibquantumcfomneperlbensjxalancbhmean png: http://imgur.com/cwRnmCi That is, a hmean gain of 2.6%. - specINT 2006 (train set), x86_64-softmmu. Host: Intel i7-4790K @ 4.00GHz Y axis: Speedup over 95b31d70 1.25x+-+-------------------------------------------------------------+-+ | cross $$ | | | 1.2x+-+.............................................................+-+ | : +++ | 1.15x+-+.............................................................+-+ | : $$$ $$$$ $$$$ | | $$$$ +++ $:$ $++$ +++ $: $ | 1.1x+-+.........$..$.$$$$.....................$.$.$..$......$..$....+-+ | +++ $++$ $++$ +++ : $ $ $ $ : $++$ +++ | 1.05x+-+....$$$$.$..$.$..$......$$$............$.$.$..$.$$$$.$..$.$$$$-+ | $++$ $ $ $ $ $$$ $:$ $ $ $ $ $ :$ $ $ $ $ | | $ $ $ $ $ $ $:$ $+$ +++ +++ $ $ $ $ $ :$ $ $ $ $ | 1x+-$$$$G$AP$_$EM$_$ro$s$i$li$e$..$$$.......$.$.$..$.$..$.$..$.$..$-+ | $++$ $ $ $ $ $ $ $+$ $ $ $:$ $$$ $ $ $ $ $ $ $ $ $ $ | 0.95x+-$..$.$..$.$..$.$..$.$.$..$.$..$.$..$.$..$.$.$..$.$..$.$..$.$..$-+ | $ $ $ $ $ $ $ $ $ $ $ $ $ $ $+$ $ $ $ $ $ $ $ $ $ $ | | $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ | 0.9x+-$$$$-$$$$-$$$$-$$$$-$$$--$$$--$$$--$$$--$$$-$$$$-$$$$-$$$$-$$$$-+ astarbzip2gcc gobmh264rehmlibquantumcfomneperlbensjxalancbhmean png: http://imgur.com/0CbG7dD This is the larger "train" set. We get a hmean improvement of 6.1%. Signed-off-by: Emilio G. Cota <cota@braap.org> --- target/i386/helper.h | 2 ++ target/i386/misc_helper.c | 5 +++++ target/i386/translate.c | 14 +++++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-)