Message ID | 20230427230012.3800327-12-tsimpson@quicinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Hexagon (target/hexagon) short-circuit and move to DisasContext | expand |
> -----Original Message----- > From: Taylor Simpson <tsimpson@quicinc.com> > Sent: Thursday, April 27, 2023 6:00 PM > To: qemu-devel@nongnu.org > Cc: Taylor Simpson <tsimpson@quicinc.com>; richard.henderson@linaro.org; > philmd@linaro.org; ale@rev.ng; anjo@rev.ng; Brian Cain > <bcain@quicinc.com>; Matheus Bernardino (QUIC) > <quic_mathbern@quicinc.com> > Subject: [PATCH v2 11/21] Hexagon (target/hexagon) Short-circuit packet > register writes > > In certain cases, we can avoid the overhead of writing to hex_new_value > and write directly to hex_gpr. We add need_commit field to DisasContext > indicating if the end-of-packet commit is needed. If it is not needed, > get_result_gpr() and get_result_gpr_pair() can return hex_gpr. > > We pass the ctx->need_commit to helpers when needed. > > Finally, we can early-exit from gen_reg_writes during packet commit. > > There are a few instructions whose semantics write to the result before > reading all the inputs. Therefore, the idef-parser generated code is > incompatible with short-circuit. We tell idef-parser to skip them. > > For debugging purposes, we add a cpu property to turn off short-circuit. > When the short-circuit property is false, we skip the analysis and force > the end-of-packet commit. > > Here's a simple example of the TCG generated for > 0x004000b4: 0x7800c020 { R0 = #0x1 } > > BEFORE: > ---- 004000b4 > movi_i32 new_r0,$0x1 > mov_i32 r0,new_r0 > > AFTER: > ---- 004000b4 > movi_i32 r0,$0x1 > > This patch reintroduces a use of check_for_attrib, so we remove the > G_GNUC_UNUSED added earlier in this series. > > Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/hexagon/cpu.h | 1 + > target/hexagon/gen_tcg.h | 3 +- > target/hexagon/genptr.h | 2 + > target/hexagon/helper.h | 2 +- > target/hexagon/macros.h | 13 ++++- > target/hexagon/translate.h | 2 + > target/hexagon/arch.c | 3 +- > target/hexagon/cpu.c | 5 +- > target/hexagon/genptr.c | 30 ++++------- > target/hexagon/op_helper.c | 5 +- > target/hexagon/translate.c | 67 ++++++++++++++++++++++++- > target/hexagon/gen_helper_funcs.py | 2 + > target/hexagon/gen_helper_protos.py | 10 +++- > target/hexagon/gen_idef_parser_funcs.py | 7 +++ > target/hexagon/gen_tcg_funcs.py | 5 ++ > target/hexagon/hex_common.py | 3 ++ > 16 files changed, 129 insertions(+), 31 deletions(-) > > diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h > index 81b663ecfb..9252055a38 100644 > --- a/target/hexagon/cpu.h > +++ b/target/hexagon/cpu.h > @@ -146,6 +146,7 @@ struct ArchCPU { > > bool lldb_compat; > target_ulong lldb_stack_adjust; > + bool short_circuit; > }; > > #include "cpu_bits.h" > diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h > index 2b2a6175a5..1f7e535300 100644 > --- a/target/hexagon/gen_tcg.h > +++ b/target/hexagon/gen_tcg.h > @@ -592,7 +592,8 @@ > #define fGEN_TCG_A5_ACS(SHORTCODE) \ > do { \ > gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \ > - gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \ > + gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV, \ > + tcg_constant_tl(ctx->need_commit)); \ > } while (0) > > #define fGEN_TCG_S2_cabacdecbin(SHORTCODE) \ > diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h > index 75d0fc262d..420867f934 100644 > --- a/target/hexagon/genptr.h > +++ b/target/hexagon/genptr.h > @@ -58,4 +58,6 @@ void gen_set_half(int N, TCGv result, TCGv src); > void gen_set_half_i64(int N, TCGv_i64 result, TCGv src); > void probe_noshuf_load(TCGv va, int s, int mi); > > +extern const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS]; > + > #endif > diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h > index 73849e3d49..4b750d0351 100644 > --- a/target/hexagon/helper.h > +++ b/target/hexagon/helper.h > @@ -29,7 +29,7 @@ DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, > s32, s32, s32, s32, s32) > DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32) > DEF_HELPER_3(sfrecipa, i64, env, f32, f32) > DEF_HELPER_2(sfinvsqrta, i64, env, f32) > -DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64) > +DEF_HELPER_5(vacsh_val, s64, env, s64, s64, s64, i32) > DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, > s64, s64) > DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, > s64) > DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, > s64) > diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h > index 16e72ed0d5..a68446a367 100644 > --- a/target/hexagon/macros.h > +++ b/target/hexagon/macros.h > @@ -44,8 +44,17 @@ > reg_field_info[FIELD].offset) > > #define SET_USR_FIELD(FIELD, VAL) \ > - fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, > \ > - reg_field_info[FIELD].offset, (VAL)) > + do { \ > + if (pkt_need_commit) { \ > + fINSERT_BITS(env->new_value[HEX_REG_USR], \ > + reg_field_info[FIELD].width, \ > + reg_field_info[FIELD].offset, (VAL)); \ > + } else { \ > + fINSERT_BITS(env->gpr[HEX_REG_USR], \ > + reg_field_info[FIELD].width, \ > + reg_field_info[FIELD].offset, (VAL)); \ > + } \ > + } while (0) > #endif > > #ifdef QEMU_GENERATE > diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h > index f72228859f..3f6fd3452c 100644 > --- a/target/hexagon/translate.h > +++ b/target/hexagon/translate.h > @@ -62,10 +62,12 @@ typedef struct DisasContext { > int qreg_log_idx; > DECLARE_BITMAP(qregs_read, NUM_QREGS); > bool pre_commit; > + bool need_commit; > TCGCond branch_cond; > target_ulong branch_dest; > bool is_tight_loop; > bool need_pkt_has_store_s1; > + bool short_circuit; > } DisasContext; > > static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) > diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c > index da79b41c4d..d053d68487 100644 > --- a/target/hexagon/arch.c > +++ b/target/hexagon/arch.c > @@ -1,5 +1,5 @@ > /* > - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights > Reserved. > + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights > Reserved. > * > * This program is free software; you can redistribute it and/or modify > * it under the terms of the GNU General Public License as published by > @@ -224,6 +224,7 @@ void arch_fpop_start(CPUHexagonState *env) > > void arch_fpop_end(CPUHexagonState *env) > { > + const bool pkt_need_commit = true; > int flags = get_float_exception_flags(&env->fp_status); > if (flags != 0) { > SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE); > diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c > index ab40cfc283..4adf90dcfa 100644 > --- a/target/hexagon/cpu.c > +++ b/target/hexagon/cpu.c > @@ -1,5 +1,5 @@ > /* > - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights > Reserved. > + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights > Reserved. > * > * This program is free software; you can redistribute it and/or modify > * it under the terms of the GNU General Public License as published by > @@ -52,6 +52,8 @@ static Property hexagon_lldb_compat_property = > static Property hexagon_lldb_stack_adjust_property = > DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, > lldb_stack_adjust, > 0, qdev_prop_uint32, target_ulong); > +static Property hexagon_short_circuit_property = > + DEFINE_PROP_BOOL("short-circuit", HexagonCPU, short_circuit, true); > > const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { > "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", > @@ -328,6 +330,7 @@ static void hexagon_cpu_init(Object *obj) > cpu_set_cpustate_pointers(cpu); > qdev_property_add_static(DEVICE(obj), &hexagon_lldb_compat_property); > qdev_property_add_static(DEVICE(obj), > &hexagon_lldb_stack_adjust_property); > + qdev_property_add_static(DEVICE(obj), &hexagon_short_circuit_property); > } > > #include "hw/core/tcg-cpu-ops.h" > diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c > index aff9ffe37b..5a0f6b5195 100644 > --- a/target/hexagon/genptr.c > +++ b/target/hexagon/genptr.c > @@ -45,7 +45,7 @@ TCGv gen_read_preg(TCGv pred, uint8_t num) > > #define IMMUTABLE (~0) > > -static const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { > +const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { > [HEX_REG_USR] = 0xc13000c0, > [HEX_REG_PC] = IMMUTABLE, > [HEX_REG_GP] = 0x3f, > @@ -70,14 +70,18 @@ static inline void gen_masked_reg_write(TCGv > new_val, TCGv cur_val, > > static TCGv get_result_gpr(DisasContext *ctx, int rnum) > { > - return hex_new_value[rnum]; > + if (ctx->need_commit) { > + return hex_new_value[rnum]; > + } else { > + return hex_gpr[rnum]; > + } > } > > static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum) > { > TCGv_i64 result = tcg_temp_new_i64(); > - tcg_gen_concat_i32_i64(result, hex_new_value[rnum], > - hex_new_value[rnum + 1]); > + tcg_gen_concat_i32_i64(result, get_result_gpr(ctx, rnum), > + get_result_gpr(ctx, rnum + 1)); > return result; > } > > @@ -86,7 +90,7 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, > TCGv val) > const target_ulong reg_mask = reg_immut_masks[rnum]; > > gen_masked_reg_write(val, hex_gpr[rnum], reg_mask); > - tcg_gen_mov_tl(hex_new_value[rnum], val); > + tcg_gen_mov_tl(get_result_gpr(ctx, rnum), val); > if (HEX_DEBUG) { > /* Do this so HELPER(debug_commit_end) will know */ > tcg_gen_movi_tl(hex_reg_written[rnum], 1); > @@ -95,27 +99,15 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, > TCGv val) > > static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) > { > - const target_ulong reg_mask_low = reg_immut_masks[rnum]; > - const target_ulong reg_mask_high = reg_immut_masks[rnum + 1]; > TCGv val32 = tcg_temp_new(); > > /* Low word */ > tcg_gen_extrl_i64_i32(val32, val); > - gen_masked_reg_write(val32, hex_gpr[rnum], reg_mask_low); > - tcg_gen_mov_tl(hex_new_value[rnum], val32); > - if (HEX_DEBUG) { > - /* Do this so HELPER(debug_commit_end) will know */ > - tcg_gen_movi_tl(hex_reg_written[rnum], 1); > - } > + gen_log_reg_write(ctx, rnum, val32); > > /* High word */ > tcg_gen_extrh_i64_i32(val32, val); > - gen_masked_reg_write(val32, hex_gpr[rnum + 1], reg_mask_high); > - tcg_gen_mov_tl(hex_new_value[rnum + 1], val32); > - if (HEX_DEBUG) { > - /* Do this so HELPER(debug_commit_end) will know */ > - tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); > - } > + gen_log_reg_write(ctx, rnum + 1, val32); > } > > void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) > diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c > index 46ccc59106..fc5c30a141 100644 > --- a/target/hexagon/op_helper.c > +++ b/target/hexagon/op_helper.c > @@ -220,7 +220,7 @@ void HELPER(debug_commit_end)(CPUHexagonState > *env, int has_st0, int has_st1) > reg_printed = true; > } > HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx > ")\n", > - i, env->new_value[i], env->new_value[i]); > + i, env->gpr[i], env->gpr[i]); > } > } > > @@ -352,7 +352,8 @@ uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, > float32 RsV) > } > > int64_t HELPER(vacsh_val)(CPUHexagonState *env, > - int64_t RxxV, int64_t RssV, int64_t RttV) > + int64_t RxxV, int64_t RssV, int64_t RttV, > + uint32_t pkt_need_commit) > { > for (int i = 0; i < 4; i++) { > int xv = sextract64(RxxV, i * 16, 16); > diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c > index 023fc9be1e..5bd71bdcaf 100644 > --- a/target/hexagon/translate.c > +++ b/target/hexagon/translate.c > @@ -27,6 +27,7 @@ > #include "insn.h" > #include "decode.h" > #include "translate.h" > +#include "genptr.h" > #include "printinsn.h" > > #include "analyze_funcs_generated.c.inc" > @@ -239,7 +240,7 @@ static int read_packet_words(CPUHexagonState *env, > DisasContext *ctx, > return nwords; > } > > -static G_GNUC_UNUSED bool check_for_attrib(Packet *pkt, int attrib) > +static bool check_for_attrib(Packet *pkt, int attrib) > { > for (int i = 0; i < pkt->num_insns; i++) { > if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) { > @@ -336,6 +337,58 @@ static void mark_implicit_pred_writes(DisasContext > *ctx) > mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); > } > > +static bool pkt_raises_exception(Packet *pkt) > +{ > + if (check_for_attrib(pkt, A_LOAD) || > + check_for_attrib(pkt, A_STORE)) { > + return true; > + } > + return false; > +} > + > +static bool need_commit(DisasContext *ctx) > +{ > + Packet *pkt = ctx->pkt; > + > + /* > + * If the short-circuit property is set to false, we'll always do the commit > + */ > + if (!ctx->short_circuit) { > + return true; > + } > + > + if (pkt_raises_exception(pkt)) { > + return true; > + } > + > + /* Registers with immutability flags require new_value */ > + for (int i = 0; i < ctx->reg_log_idx; i++) { > + int rnum = ctx->reg_log[i]; > + if (reg_immut_masks[rnum]) { > + return true; > + } > + } > + > + /* Floating point instructions are hard-coded to use new_value */ > + if (check_for_attrib(pkt, A_FPOP)) { > + return true; > + } > + > + if (pkt->num_insns == 1) { > + return false; > + } > + > + /* Check for overlap between register reads and writes */ > + for (int i = 0; i < ctx->reg_log_idx; i++) { > + int rnum = ctx->reg_log[i]; > + if (test_bit(rnum, ctx->regs_read)) { > + return true; > + } > + } > + > + return false; > +} > + > static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum) > { > if (GET_ATTRIB(ctx->insn->opcode, attrib)) { > @@ -365,6 +418,8 @@ static void analyze_packet(DisasContext *ctx) > mark_implicit_pred_writes(ctx); > mark_implicit_pred_reads(ctx); > } > + > + ctx->need_commit = need_commit(ctx); > } > > static void gen_start_packet(DisasContext *ctx) > @@ -434,7 +489,8 @@ static void gen_start_packet(DisasContext *ctx) > } > > /* Preload the predicated registers into hex_new_value[i] */ > - if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { > + if (ctx->need_commit && > + !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { > int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); > while (i < TOTAL_PER_THREAD_REGS) { > tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]); > @@ -541,6 +597,11 @@ static void gen_reg_writes(DisasContext *ctx) > { > int i; > > + /* Early exit if not needed */ > + if (!ctx->need_commit) { > + return; > + } > + > for (i = 0; i < ctx->reg_log_idx; i++) { > int reg_num = ctx->reg_log[i]; > > @@ -919,6 +980,7 @@ static void > hexagon_tr_init_disas_context(DisasContextBase *dcbase, > CPUState *cs) > { > DisasContext *ctx = container_of(dcbase, DisasContext, base); > + HexagonCPU *hex_cpu = env_archcpu(cs->env_ptr); > uint32_t hex_flags = dcbase->tb->flags; > > ctx->mem_idx = MMU_USER_IDX; > @@ -927,6 +989,7 @@ static void > hexagon_tr_init_disas_context(DisasContextBase *dcbase, > ctx->num_hvx_insns = 0; > ctx->branch_cond = TCG_COND_NEVER; > ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); > + ctx->short_circuit = hex_cpu->short_circuit; > } > > static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) > diff --git a/target/hexagon/gen_helper_funcs.py > b/target/hexagon/gen_helper_funcs.py > index c73d792580..e259ea3d03 100755 > --- a/target/hexagon/gen_helper_funcs.py > +++ b/target/hexagon/gen_helper_funcs.py > @@ -287,6 +287,8 @@ def gen_helper_function(f, tag, tagregs, tagimms): > > if hex_common.need_pkt_has_multi_cof(tag): > f.write(", uint32_t pkt_has_multi_cof") > + if (hex_common.need_pkt_need_commit(tag)): > + f.write(", uint32_t pkt_need_commit") > > if hex_common.need_PC(tag): > if i > 0: > diff --git a/target/hexagon/gen_helper_protos.py > b/target/hexagon/gen_helper_protos.py > index 187cd6e04e..c5ecb85294 100755 > --- a/target/hexagon/gen_helper_protos.py > +++ b/target/hexagon/gen_helper_protos.py > @@ -86,6 +86,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): > def_helper_size = len(regs) + len(imms) + numscalarreadwrite + 1 > if hex_common.need_pkt_has_multi_cof(tag): > def_helper_size += 1 > + if hex_common.need_pkt_need_commit(tag): > + def_helper_size += 1 > if hex_common.need_part1(tag): > def_helper_size += 1 > if hex_common.need_slot(tag): > @@ -103,6 +105,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): > def_helper_size = len(regs) + len(imms) + numscalarreadwrite > if hex_common.need_pkt_has_multi_cof(tag): > def_helper_size += 1 > + if hex_common.need_pkt_need_commit(tag): > + def_helper_size += 1 > if hex_common.need_part1(tag): > def_helper_size += 1 > if hex_common.need_slot(tag): > @@ -156,10 +160,12 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): > for immlett, bits, immshift in imms: > f.write(", s32") > > - ## Add the arguments for the instruction pkt_has_multi_cof, slot and > - ## part1 (if needed) > + ## Add the arguments for the instruction pkt_has_multi_cof, > + ## pkt_needs_commit, PC, next_PC, slot, and part1 (if needed) > if hex_common.need_pkt_has_multi_cof(tag): > f.write(", i32") > + if hex_common.need_pkt_need_commit(tag): > + f.write(', i32') > if hex_common.need_PC(tag): > f.write(", i32") > if hex_common.helper_needs_next_PC(tag): > diff --git a/target/hexagon/gen_idef_parser_funcs.py > b/target/hexagon/gen_idef_parser_funcs.py > index afe68bdb6f..b7f2df0f36 100644 > --- a/target/hexagon/gen_idef_parser_funcs.py > +++ b/target/hexagon/gen_idef_parser_funcs.py > @@ -109,6 +109,13 @@ def main(): > continue > if "A_COF" in hex_common.attribdict[tag]: > continue > + ## Skip instructions that are incompatible with short-circuit > + ## packet register writes > + if ( tag == 'S2_insert' or > + tag == 'S2_insert_rp' or > + tag == 'S2_asr_r_svw_trun' or > + tag == 'A2_swiz' ): > + continue > > regs = tagregs[tag] > imms = tagimms[tag] > diff --git a/target/hexagon/gen_tcg_funcs.py > b/target/hexagon/gen_tcg_funcs.py > index d9ccbe63f6..0e45d43685 100755 > --- a/target/hexagon/gen_tcg_funcs.py > +++ b/target/hexagon/gen_tcg_funcs.py > @@ -550,6 +550,9 @@ def gen_tcg_func(f, tag, regs, imms): > if hex_common.need_pkt_has_multi_cof(tag): > f.write(" TCGv pkt_has_multi_cof = ") > f.write("tcg_constant_tl(ctx->pkt->pkt_has_multi_cof);\n") > + if hex_common.need_pkt_need_commit(tag): > + f.write(" TCGv pkt_need_commit = ") > + f.write("tcg_constant_tl(ctx->need_commit);\n") > if hex_common.need_part1(tag): > f.write(" TCGv part1 = tcg_constant_tl(insn->part1);\n") > if hex_common.need_slot(tag): > @@ -596,6 +599,8 @@ def gen_tcg_func(f, tag, regs, imms): > > if hex_common.need_pkt_has_multi_cof(tag): > f.write(", pkt_has_multi_cof") > + if hex_common.need_pkt_need_commit(tag): > + f.write(", pkt_need_commit") > if hex_common.need_PC(tag): > f.write(", PC") > if hex_common.helper_needs_next_PC(tag): > diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py > index 232c6e2c20..29c0508f66 100755 > --- a/target/hexagon/hex_common.py > +++ b/target/hexagon/hex_common.py > @@ -276,6 +276,9 @@ def need_pkt_has_multi_cof(tag): > return "A_COF" in attribdict[tag] > > > +def need_pkt_need_commit(tag): > + return 'A_IMPLICIT_WRITES_USR' in attribdict[tag] > + > def need_condexec_reg(tag, regs): > if "A_CONDEXEC" in attribdict[tag]: > for regtype, regid, toss, numregs in regs: > -- > 2.25.1 Reviewed-by: Brian Cain <bcain@quicinc.com>
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 81b663ecfb..9252055a38 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -146,6 +146,7 @@ struct ArchCPU { bool lldb_compat; target_ulong lldb_stack_adjust; + bool short_circuit; }; #include "cpu_bits.h" diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 2b2a6175a5..1f7e535300 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -592,7 +592,8 @@ #define fGEN_TCG_A5_ACS(SHORTCODE) \ do { \ gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \ - gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \ + gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV, \ + tcg_constant_tl(ctx->need_commit)); \ } while (0) #define fGEN_TCG_S2_cabacdecbin(SHORTCODE) \ diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h index 75d0fc262d..420867f934 100644 --- a/target/hexagon/genptr.h +++ b/target/hexagon/genptr.h @@ -58,4 +58,6 @@ void gen_set_half(int N, TCGv result, TCGv src); void gen_set_half_i64(int N, TCGv_i64 result, TCGv src); void probe_noshuf_load(TCGv va, int s, int mi); +extern const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS]; + #endif diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 73849e3d49..4b750d0351 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -29,7 +29,7 @@ DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_3(sfrecipa, i64, env, f32, f32) DEF_HELPER_2(sfinvsqrta, i64, env, f32) -DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64) +DEF_HELPER_5(vacsh_val, s64, env, s64, s64, s64, i32) DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64) DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, s64) DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, s64) diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 16e72ed0d5..a68446a367 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -44,8 +44,17 @@ reg_field_info[FIELD].offset) #define SET_USR_FIELD(FIELD, VAL) \ - fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, \ - reg_field_info[FIELD].offset, (VAL)) + do { \ + if (pkt_need_commit) { \ + fINSERT_BITS(env->new_value[HEX_REG_USR], \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + } else { \ + fINSERT_BITS(env->gpr[HEX_REG_USR], \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + } \ + } while (0) #endif #ifdef QEMU_GENERATE diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index f72228859f..3f6fd3452c 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -62,10 +62,12 @@ typedef struct DisasContext { int qreg_log_idx; DECLARE_BITMAP(qregs_read, NUM_QREGS); bool pre_commit; + bool need_commit; TCGCond branch_cond; target_ulong branch_dest; bool is_tight_loop; bool need_pkt_has_store_s1; + bool short_circuit; } DisasContext; static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index da79b41c4d..d053d68487 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -224,6 +224,7 @@ void arch_fpop_start(CPUHexagonState *env) void arch_fpop_end(CPUHexagonState *env) { + const bool pkt_need_commit = true; int flags = get_float_exception_flags(&env->fp_status); if (flags != 0) { SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE); diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index ab40cfc283..4adf90dcfa 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -52,6 +52,8 @@ static Property hexagon_lldb_compat_property = static Property hexagon_lldb_stack_adjust_property = DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, qdev_prop_uint32, target_ulong); +static Property hexagon_short_circuit_property = + DEFINE_PROP_BOOL("short-circuit", HexagonCPU, short_circuit, true); const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", @@ -328,6 +330,7 @@ static void hexagon_cpu_init(Object *obj) cpu_set_cpustate_pointers(cpu); qdev_property_add_static(DEVICE(obj), &hexagon_lldb_compat_property); qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property); + qdev_property_add_static(DEVICE(obj), &hexagon_short_circuit_property); } #include "hw/core/tcg-cpu-ops.h" diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index aff9ffe37b..5a0f6b5195 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -45,7 +45,7 @@ TCGv gen_read_preg(TCGv pred, uint8_t num) #define IMMUTABLE (~0) -static const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { +const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { [HEX_REG_USR] = 0xc13000c0, [HEX_REG_PC] = IMMUTABLE, [HEX_REG_GP] = 0x3f, @@ -70,14 +70,18 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val, static TCGv get_result_gpr(DisasContext *ctx, int rnum) { - return hex_new_value[rnum]; + if (ctx->need_commit) { + return hex_new_value[rnum]; + } else { + return hex_gpr[rnum]; + } } static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum) { TCGv_i64 result = tcg_temp_new_i64(); - tcg_gen_concat_i32_i64(result, hex_new_value[rnum], - hex_new_value[rnum + 1]); + tcg_gen_concat_i32_i64(result, get_result_gpr(ctx, rnum), + get_result_gpr(ctx, rnum + 1)); return result; } @@ -86,7 +90,7 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val) const target_ulong reg_mask = reg_immut_masks[rnum]; gen_masked_reg_write(val, hex_gpr[rnum], reg_mask); - tcg_gen_mov_tl(hex_new_value[rnum], val); + tcg_gen_mov_tl(get_result_gpr(ctx, rnum), val); if (HEX_DEBUG) { /* Do this so HELPER(debug_commit_end) will know */ tcg_gen_movi_tl(hex_reg_written[rnum], 1); @@ -95,27 +99,15 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val) static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) { - const target_ulong reg_mask_low = reg_immut_masks[rnum]; - const target_ulong reg_mask_high = reg_immut_masks[rnum + 1]; TCGv val32 = tcg_temp_new(); /* Low word */ tcg_gen_extrl_i64_i32(val32, val); - gen_masked_reg_write(val32, hex_gpr[rnum], reg_mask_low); - tcg_gen_mov_tl(hex_new_value[rnum], val32); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum], 1); - } + gen_log_reg_write(ctx, rnum, val32); /* High word */ tcg_gen_extrh_i64_i32(val32, val); - gen_masked_reg_write(val32, hex_gpr[rnum + 1], reg_mask_high); - tcg_gen_mov_tl(hex_new_value[rnum + 1], val32); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); - } + gen_log_reg_write(ctx, rnum + 1, val32); } void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 46ccc59106..fc5c30a141 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -220,7 +220,7 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) reg_printed = true; } HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n", - i, env->new_value[i], env->new_value[i]); + i, env->gpr[i], env->gpr[i]); } } @@ -352,7 +352,8 @@ uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV) } int64_t HELPER(vacsh_val)(CPUHexagonState *env, - int64_t RxxV, int64_t RssV, int64_t RttV) + int64_t RxxV, int64_t RssV, int64_t RttV, + uint32_t pkt_need_commit) { for (int i = 0; i < 4; i++) { int xv = sextract64(RxxV, i * 16, 16); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 023fc9be1e..5bd71bdcaf 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -27,6 +27,7 @@ #include "insn.h" #include "decode.h" #include "translate.h" +#include "genptr.h" #include "printinsn.h" #include "analyze_funcs_generated.c.inc" @@ -239,7 +240,7 @@ static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, return nwords; } -static G_GNUC_UNUSED bool check_for_attrib(Packet *pkt, int attrib) +static bool check_for_attrib(Packet *pkt, int attrib) { for (int i = 0; i < pkt->num_insns; i++) { if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) { @@ -336,6 +337,58 @@ static void mark_implicit_pred_writes(DisasContext *ctx) mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); } +static bool pkt_raises_exception(Packet *pkt) +{ + if (check_for_attrib(pkt, A_LOAD) || + check_for_attrib(pkt, A_STORE)) { + return true; + } + return false; +} + +static bool need_commit(DisasContext *ctx) +{ + Packet *pkt = ctx->pkt; + + /* + * If the short-circuit property is set to false, we'll always do the commit + */ + if (!ctx->short_circuit) { + return true; + } + + if (pkt_raises_exception(pkt)) { + return true; + } + + /* Registers with immutability flags require new_value */ + for (int i = 0; i < ctx->reg_log_idx; i++) { + int rnum = ctx->reg_log[i]; + if (reg_immut_masks[rnum]) { + return true; + } + } + + /* Floating point instructions are hard-coded to use new_value */ + if (check_for_attrib(pkt, A_FPOP)) { + return true; + } + + if (pkt->num_insns == 1) { + return false; + } + + /* Check for overlap between register reads and writes */ + for (int i = 0; i < ctx->reg_log_idx; i++) { + int rnum = ctx->reg_log[i]; + if (test_bit(rnum, ctx->regs_read)) { + return true; + } + } + + return false; +} + static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum) { if (GET_ATTRIB(ctx->insn->opcode, attrib)) { @@ -365,6 +418,8 @@ static void analyze_packet(DisasContext *ctx) mark_implicit_pred_writes(ctx); mark_implicit_pred_reads(ctx); } + + ctx->need_commit = need_commit(ctx); } static void gen_start_packet(DisasContext *ctx) @@ -434,7 +489,8 @@ static void gen_start_packet(DisasContext *ctx) } /* Preload the predicated registers into hex_new_value[i] */ - if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { + if (ctx->need_commit && + !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); while (i < TOTAL_PER_THREAD_REGS) { tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]); @@ -541,6 +597,11 @@ static void gen_reg_writes(DisasContext *ctx) { int i; + /* Early exit if not needed */ + if (!ctx->need_commit) { + return; + } + for (i = 0; i < ctx->reg_log_idx; i++) { int reg_num = ctx->reg_log[i]; @@ -919,6 +980,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *ctx = container_of(dcbase, DisasContext, base); + HexagonCPU *hex_cpu = env_archcpu(cs->env_ptr); uint32_t hex_flags = dcbase->tb->flags; ctx->mem_idx = MMU_USER_IDX; @@ -927,6 +989,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, ctx->num_hvx_insns = 0; ctx->branch_cond = TCG_COND_NEVER; ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); + ctx->short_circuit = hex_cpu->short_circuit; } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index c73d792580..e259ea3d03 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -287,6 +287,8 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_pkt_has_multi_cof(tag): f.write(", uint32_t pkt_has_multi_cof") + if (hex_common.need_pkt_need_commit(tag)): + f.write(", uint32_t pkt_need_commit") if hex_common.need_PC(tag): if i > 0: diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py index 187cd6e04e..c5ecb85294 100755 --- a/target/hexagon/gen_helper_protos.py +++ b/target/hexagon/gen_helper_protos.py @@ -86,6 +86,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): def_helper_size = len(regs) + len(imms) + numscalarreadwrite + 1 if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1 + if hex_common.need_pkt_need_commit(tag): + def_helper_size += 1 if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_slot(tag): @@ -103,6 +105,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): def_helper_size = len(regs) + len(imms) + numscalarreadwrite if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1 + if hex_common.need_pkt_need_commit(tag): + def_helper_size += 1 if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_slot(tag): @@ -156,10 +160,12 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): for immlett, bits, immshift in imms: f.write(", s32") - ## Add the arguments for the instruction pkt_has_multi_cof, slot and - ## part1 (if needed) + ## Add the arguments for the instruction pkt_has_multi_cof, + ## pkt_needs_commit, PC, next_PC, slot, and part1 (if needed) if hex_common.need_pkt_has_multi_cof(tag): f.write(", i32") + if hex_common.need_pkt_need_commit(tag): + f.write(', i32') if hex_common.need_PC(tag): f.write(", i32") if hex_common.helper_needs_next_PC(tag): diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index afe68bdb6f..b7f2df0f36 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -109,6 +109,13 @@ def main(): continue if "A_COF" in hex_common.attribdict[tag]: continue + ## Skip instructions that are incompatible with short-circuit + ## packet register writes + if ( tag == 'S2_insert' or + tag == 'S2_insert_rp' or + tag == 'S2_asr_r_svw_trun' or + tag == 'A2_swiz' ): + continue regs = tagregs[tag] imms = tagimms[tag] diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index d9ccbe63f6..0e45d43685 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -550,6 +550,9 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.need_pkt_has_multi_cof(tag): f.write(" TCGv pkt_has_multi_cof = ") f.write("tcg_constant_tl(ctx->pkt->pkt_has_multi_cof);\n") + if hex_common.need_pkt_need_commit(tag): + f.write(" TCGv pkt_need_commit = ") + f.write("tcg_constant_tl(ctx->need_commit);\n") if hex_common.need_part1(tag): f.write(" TCGv part1 = tcg_constant_tl(insn->part1);\n") if hex_common.need_slot(tag): @@ -596,6 +599,8 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.need_pkt_has_multi_cof(tag): f.write(", pkt_has_multi_cof") + if hex_common.need_pkt_need_commit(tag): + f.write(", pkt_need_commit") if hex_common.need_PC(tag): f.write(", PC") if hex_common.helper_needs_next_PC(tag): diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 232c6e2c20..29c0508f66 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -276,6 +276,9 @@ def need_pkt_has_multi_cof(tag): return "A_COF" in attribdict[tag] +def need_pkt_need_commit(tag): + return 'A_IMPLICIT_WRITES_USR' in attribdict[tag] + def need_condexec_reg(tag, regs): if "A_CONDEXEC" in attribdict[tag]: for regtype, regid, toss, numregs in regs: