diff mbox series

[11/21] Hexagon (target/hexagon) Short-circuit packet register writes

Message ID 20230426004234.1319401-2-tsimpson@quicinc.com (mailing list archive)
State New, archived
Headers show
Series Hexagon (target/hexagon) short-circuit and move to DisasContext | expand

Commit Message

Taylor Simpson April 26, 2023, 12:42 a.m. UTC
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr.  We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed.  If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.

We pass the ctx->need_commit to helpers when needed.

Finally, we can early-exit from gen_reg_writes during packet commit.

There are a few instructions whose semantics write to the result before
reading all the inputs.  Therefore, the idef-parser generated code is
incompatible with short-circuit.  We tell idef-parser to skip them.

For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.

Here's a simple example of the TCG generated for
0x004000b4:  0x7800c020 {       R0 = #0x1 }

BEFORE:
 ---- 004000b4
 movi_i32 new_r0,$0x1
 mov_i32 r0,new_r0

AFTER:
 ---- 004000b4
 movi_i32 r0,$0x1

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/cpu.h                    |  1 +
 target/hexagon/gen_tcg.h                |  3 +-
 target/hexagon/genptr.h                 |  2 +
 target/hexagon/helper.h                 |  2 +-
 target/hexagon/macros.h                 | 13 ++++-
 target/hexagon/translate.h              |  2 +
 target/hexagon/arch.c                   |  3 +-
 target/hexagon/cpu.c                    |  5 +-
 target/hexagon/genptr.c                 | 30 +++++-------
 target/hexagon/op_helper.c              |  5 +-
 target/hexagon/translate.c              | 65 ++++++++++++++++++++++++-
 target/hexagon/gen_helper_funcs.py      |  2 +
 target/hexagon/gen_helper_protos.py     | 10 +++-
 target/hexagon/gen_idef_parser_funcs.py |  7 +++
 target/hexagon/gen_tcg_funcs.py         |  5 ++
 target/hexagon/hex_common.py            |  3 ++
 16 files changed, 128 insertions(+), 30 deletions(-)

Comments

Richard Henderson April 27, 2023, 10:40 a.m. UTC | #1
On 4/26/23 01:42, Taylor Simpson wrote:
> In certain cases, we can avoid the overhead of writing to hex_new_value
> and write directly to hex_gpr.  We add need_commit field to DisasContext
> indicating if the end-of-packet commit is needed.  If it is not needed,
> get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
> 
> We pass the ctx->need_commit to helpers when needed.
> 
> Finally, we can early-exit from gen_reg_writes during packet commit.
> 
> There are a few instructions whose semantics write to the result before
> reading all the inputs.  Therefore, the idef-parser generated code is
> incompatible with short-circuit.  We tell idef-parser to skip them.
> 
> For debugging purposes, we add a cpu property to turn off short-circuit.
> When the short-circuit property is false, we skip the analysis and force
> the end-of-packet commit.
> 
> Here's a simple example of the TCG generated for
> 0x004000b4:  0x7800c020 {       R0 = #0x1 }
> 
> BEFORE:
>   ---- 004000b4
>   movi_i32 new_r0,$0x1
>   mov_i32 r0,new_r0
> 
> AFTER:
>   ---- 004000b4
>   movi_i32 r0,$0x1
> 
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~
diff mbox series

Patch

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 81b663ecfb..9252055a38 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -146,6 +146,7 @@  struct ArchCPU {
 
     bool lldb_compat;
     target_ulong lldb_stack_adjust;
+    bool short_circuit;
 };
 
 #include "cpu_bits.h"
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index dbb64b9259..ec388629b3 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -592,7 +592,8 @@ 
 #define fGEN_TCG_A5_ACS(SHORTCODE) \
     do { \
         gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \
-        gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \
+        gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV, \
+                             tcg_constant_tl(ctx->need_commit)); \
     } while (0)
 
 #define fGEN_TCG_S2_cabacdecbin(SHORTCODE) \
diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h
index 75d0fc262d..420867f934 100644
--- a/target/hexagon/genptr.h
+++ b/target/hexagon/genptr.h
@@ -58,4 +58,6 @@  void gen_set_half(int N, TCGv result, TCGv src);
 void gen_set_half_i64(int N, TCGv_i64 result, TCGv src);
 void probe_noshuf_load(TCGv va, int s, int mi);
 
+extern const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS];
+
 #endif
diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h
index 73849e3d49..4b750d0351 100644
--- a/target/hexagon/helper.h
+++ b/target/hexagon/helper.h
@@ -29,7 +29,7 @@  DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32)
 DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32)
 DEF_HELPER_3(sfrecipa, i64, env, f32, f32)
 DEF_HELPER_2(sfinvsqrta, i64, env, f32)
-DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64)
+DEF_HELPER_5(vacsh_val, s64, env, s64, s64, s64, i32)
 DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64)
 DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, s64)
 DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, s64)
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 16e72ed0d5..a68446a367 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -44,8 +44,17 @@ 
                    reg_field_info[FIELD].offset)
 
 #define SET_USR_FIELD(FIELD, VAL) \
-    fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, \
-                 reg_field_info[FIELD].offset, (VAL))
+    do { \
+        if (pkt_need_commit) { \
+            fINSERT_BITS(env->new_value[HEX_REG_USR], \
+                        reg_field_info[FIELD].width, \
+                        reg_field_info[FIELD].offset, (VAL)); \
+        } else { \
+            fINSERT_BITS(env->gpr[HEX_REG_USR], \
+                        reg_field_info[FIELD].width, \
+                        reg_field_info[FIELD].offset, (VAL)); \
+        } \
+    } while (0)
 #endif
 
 #ifdef QEMU_GENERATE
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index f72228859f..3f6fd3452c 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -62,10 +62,12 @@  typedef struct DisasContext {
     int qreg_log_idx;
     DECLARE_BITMAP(qregs_read, NUM_QREGS);
     bool pre_commit;
+    bool need_commit;
     TCGCond branch_cond;
     target_ulong branch_dest;
     bool is_tight_loop;
     bool need_pkt_has_store_s1;
+    bool short_circuit;
 } DisasContext;
 
 static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c
index da79b41c4d..d053d68487 100644
--- a/target/hexagon/arch.c
+++ b/target/hexagon/arch.c
@@ -1,5 +1,5 @@ 
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -224,6 +224,7 @@  void arch_fpop_start(CPUHexagonState *env)
 
 void arch_fpop_end(CPUHexagonState *env)
 {
+    const bool pkt_need_commit = true;
     int flags = get_float_exception_flags(&env->fp_status);
     if (flags != 0) {
         SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index ab40cfc283..4adf90dcfa 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -1,5 +1,5 @@ 
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -52,6 +52,8 @@  static Property hexagon_lldb_compat_property =
 static Property hexagon_lldb_stack_adjust_property =
     DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust,
                          0, qdev_prop_uint32, target_ulong);
+static Property hexagon_short_circuit_property =
+    DEFINE_PROP_BOOL("short-circuit", HexagonCPU, short_circuit, true);
 
 const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = {
    "r0", "r1",  "r2",  "r3",  "r4",   "r5",  "r6",  "r7",
@@ -328,6 +330,7 @@  static void hexagon_cpu_init(Object *obj)
     cpu_set_cpustate_pointers(cpu);
     qdev_property_add_static(DEVICE(obj), &hexagon_lldb_compat_property);
     qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property);
+    qdev_property_add_static(DEVICE(obj), &hexagon_short_circuit_property);
 }
 
 #include "hw/core/tcg-cpu-ops.h"
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index e4c116edc0..9431beebc3 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -45,7 +45,7 @@  TCGv gen_read_preg(TCGv pred, uint8_t num)
 
 #define IMMUTABLE (~0)
 
-static const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = {
+const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = {
     [HEX_REG_USR] = 0xc13000c0,
     [HEX_REG_PC] = IMMUTABLE,
     [HEX_REG_GP] = 0x3f,
@@ -70,14 +70,18 @@  static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val,
 
 static TCGv get_result_gpr(DisasContext *ctx, int rnum)
 {
-    return hex_new_value[rnum];
+    if (ctx->need_commit) {
+        return hex_new_value[rnum];
+    } else {
+        return hex_gpr[rnum];
+    }
 }
 
 static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum)
 {
     TCGv_i64 result = tcg_temp_new_i64();
-    tcg_gen_concat_i32_i64(result, hex_new_value[rnum],
-                                   hex_new_value[rnum + 1]);
+    tcg_gen_concat_i32_i64(result, get_result_gpr(ctx, rnum),
+                                   get_result_gpr(ctx, rnum + 1));
     return result;
 }
 
@@ -86,7 +90,7 @@  void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val)
     const target_ulong reg_mask = reg_immut_masks[rnum];
 
     gen_masked_reg_write(val, hex_gpr[rnum], reg_mask);
-    tcg_gen_mov_tl(hex_new_value[rnum], val);
+    tcg_gen_mov_tl(get_result_gpr(ctx, rnum), val);
     if (HEX_DEBUG) {
         /* Do this so HELPER(debug_commit_end) will know */
         tcg_gen_movi_tl(hex_reg_written[rnum], 1);
@@ -95,27 +99,15 @@  void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val)
 
 static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val)
 {
-    const target_ulong reg_mask_low = reg_immut_masks[rnum];
-    const target_ulong reg_mask_high = reg_immut_masks[rnum + 1];
     TCGv val32 = tcg_temp_new();
 
     /* Low word */
     tcg_gen_extrl_i64_i32(val32, val);
-    gen_masked_reg_write(val32, hex_gpr[rnum], reg_mask_low);
-    tcg_gen_mov_tl(hex_new_value[rnum], val32);
-    if (HEX_DEBUG) {
-        /* Do this so HELPER(debug_commit_end) will know */
-        tcg_gen_movi_tl(hex_reg_written[rnum], 1);
-    }
+    gen_log_reg_write(ctx, rnum, val32);
 
     /* High word */
     tcg_gen_extrh_i64_i32(val32, val);
-    gen_masked_reg_write(val32, hex_gpr[rnum + 1], reg_mask_high);
-    tcg_gen_mov_tl(hex_new_value[rnum + 1], val32);
-    if (HEX_DEBUG) {
-        /* Do this so HELPER(debug_commit_end) will know */
-        tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1);
-    }
+    gen_log_reg_write(ctx, rnum + 1, val32);
 }
 
 void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 46ccc59106..fc5c30a141 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -220,7 +220,7 @@  void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
                 reg_printed = true;
             }
             HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
-                          i, env->new_value[i], env->new_value[i]);
+                          i, env->gpr[i], env->gpr[i]);
         }
     }
 
@@ -352,7 +352,8 @@  uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
 }
 
 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
-                           int64_t RxxV, int64_t RssV, int64_t RttV)
+                           int64_t RxxV, int64_t RssV, int64_t RttV,
+                           uint32_t pkt_need_commit)
 {
     for (int i = 0; i < 4; i++) {
         int xv = sextract64(RxxV, i * 16, 16);
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 004d91f108..a87f7e7130 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -27,6 +27,7 @@ 
 #include "insn.h"
 #include "decode.h"
 #include "translate.h"
+#include "genptr.h"
 #include "printinsn.h"
 
 #include "analyze_funcs_generated.c.inc"
@@ -336,6 +337,58 @@  static void mark_implicit_pred_writes(DisasContext *ctx)
     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
 }
 
+static bool pkt_raises_exception(Packet *pkt)
+{
+    if (check_for_attrib(pkt, A_LOAD) ||
+        check_for_attrib(pkt, A_STORE)) {
+        return true;
+    }
+    return false;
+}
+
+static bool need_commit(DisasContext *ctx)
+{
+    Packet *pkt = ctx->pkt;
+
+    /*
+     * If the short-circuit property is set to false, we'll always do the commit
+     */
+    if (!ctx->short_circuit) {
+        return true;
+    }
+
+    if (pkt_raises_exception(pkt)) {
+        return true;
+    }
+
+    /* Registers with immutability flags require new_value */
+    for (int i = 0; i < ctx->reg_log_idx; i++) {
+        int rnum = ctx->reg_log[i];
+        if (reg_immut_masks[rnum]) {
+            return true;
+        }
+    }
+
+    /* Floating point instructions are hard-coded to use new_value */
+    if (check_for_attrib(pkt, A_FPOP)) {
+        return true;
+    }
+
+    if (pkt->num_insns == 1) {
+        return false;
+    }
+
+    /* Check for overlap between register reads and writes */
+    for (int i = 0; i < ctx->reg_log_idx; i++) {
+        int rnum = ctx->reg_log[i];
+        if (test_bit(rnum, ctx->regs_read)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum)
 {
     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
@@ -365,6 +418,8 @@  static void analyze_packet(DisasContext *ctx)
         mark_implicit_pred_writes(ctx);
         mark_implicit_pred_reads(ctx);
     }
+
+    ctx->need_commit = need_commit(ctx);
 }
 
 static void gen_start_packet(DisasContext *ctx)
@@ -434,7 +489,8 @@  static void gen_start_packet(DisasContext *ctx)
     }
 
     /* Preload the predicated registers into hex_new_value[i] */
-    if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
+    if (ctx->need_commit &&
+        !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
         int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
         while (i < TOTAL_PER_THREAD_REGS) {
             tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
@@ -541,6 +597,11 @@  static void gen_reg_writes(DisasContext *ctx)
 {
     int i;
 
+    /* Early exit if not needed */
+    if (!ctx->need_commit) {
+        return;
+    }
+
     for (i = 0; i < ctx->reg_log_idx; i++) {
         int reg_num = ctx->reg_log[i];
 
@@ -919,6 +980,7 @@  static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
                                           CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    HexagonCPU *hex_cpu = env_archcpu(cs->env_ptr);
     uint32_t hex_flags = dcbase->tb->flags;
 
     ctx->mem_idx = MMU_USER_IDX;
@@ -927,6 +989,7 @@  static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
     ctx->num_hvx_insns = 0;
     ctx->branch_cond = TCG_COND_NEVER;
     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
+    ctx->short_circuit = hex_cpu->short_circuit;
 }
 
 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py
index c73d792580..e259ea3d03 100755
--- a/target/hexagon/gen_helper_funcs.py
+++ b/target/hexagon/gen_helper_funcs.py
@@ -287,6 +287,8 @@  def gen_helper_function(f, tag, tagregs, tagimms):
 
         if hex_common.need_pkt_has_multi_cof(tag):
             f.write(", uint32_t pkt_has_multi_cof")
+        if (hex_common.need_pkt_need_commit(tag)):
+            f.write(", uint32_t pkt_need_commit")
 
         if hex_common.need_PC(tag):
             if i > 0:
diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py
index 187cd6e04e..c5ecb85294 100755
--- a/target/hexagon/gen_helper_protos.py
+++ b/target/hexagon/gen_helper_protos.py
@@ -86,6 +86,8 @@  def gen_helper_prototype(f, tag, tagregs, tagimms):
             def_helper_size = len(regs) + len(imms) + numscalarreadwrite + 1
             if hex_common.need_pkt_has_multi_cof(tag):
                 def_helper_size += 1
+            if hex_common.need_pkt_need_commit(tag):
+                def_helper_size += 1
             if hex_common.need_part1(tag):
                 def_helper_size += 1
             if hex_common.need_slot(tag):
@@ -103,6 +105,8 @@  def gen_helper_prototype(f, tag, tagregs, tagimms):
             def_helper_size = len(regs) + len(imms) + numscalarreadwrite
             if hex_common.need_pkt_has_multi_cof(tag):
                 def_helper_size += 1
+            if hex_common.need_pkt_need_commit(tag):
+                def_helper_size += 1
             if hex_common.need_part1(tag):
                 def_helper_size += 1
             if hex_common.need_slot(tag):
@@ -156,10 +160,12 @@  def gen_helper_prototype(f, tag, tagregs, tagimms):
         for immlett, bits, immshift in imms:
             f.write(", s32")
 
-        ## Add the arguments for the instruction pkt_has_multi_cof, slot and
-        ## part1 (if needed)
+        ## Add the arguments for the instruction pkt_has_multi_cof,
+        ## pkt_needs_commit, PC, next_PC, slot, and part1 (if needed)
         if hex_common.need_pkt_has_multi_cof(tag):
             f.write(", i32")
+        if hex_common.need_pkt_need_commit(tag):
+            f.write(', i32')
         if hex_common.need_PC(tag):
             f.write(", i32")
         if hex_common.helper_needs_next_PC(tag):
diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py
index afe68bdb6f..b7f2df0f36 100644
--- a/target/hexagon/gen_idef_parser_funcs.py
+++ b/target/hexagon/gen_idef_parser_funcs.py
@@ -109,6 +109,13 @@  def main():
                 continue
             if "A_COF" in hex_common.attribdict[tag]:
                 continue
+            ## Skip instructions that are incompatible with short-circuit
+            ## packet register writes
+            if ( tag == 'S2_insert' or
+                 tag == 'S2_insert_rp' or
+                 tag == 'S2_asr_r_svw_trun' or
+                 tag == 'A2_swiz' ):
+                continue
 
             regs = tagregs[tag]
             imms = tagimms[tag]
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index d9ccbe63f6..0e45d43685 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -550,6 +550,9 @@  def gen_tcg_func(f, tag, regs, imms):
         if hex_common.need_pkt_has_multi_cof(tag):
             f.write("    TCGv pkt_has_multi_cof = ")
             f.write("tcg_constant_tl(ctx->pkt->pkt_has_multi_cof);\n")
+        if hex_common.need_pkt_need_commit(tag):
+            f.write("    TCGv pkt_need_commit = ")
+            f.write("tcg_constant_tl(ctx->need_commit);\n")
         if hex_common.need_part1(tag):
             f.write("    TCGv part1 = tcg_constant_tl(insn->part1);\n")
         if hex_common.need_slot(tag):
@@ -596,6 +599,8 @@  def gen_tcg_func(f, tag, regs, imms):
 
         if hex_common.need_pkt_has_multi_cof(tag):
             f.write(", pkt_has_multi_cof")
+        if hex_common.need_pkt_need_commit(tag):
+            f.write(", pkt_need_commit")
         if hex_common.need_PC(tag):
             f.write(", PC")
         if hex_common.helper_needs_next_PC(tag):
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 232c6e2c20..29c0508f66 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -276,6 +276,9 @@  def need_pkt_has_multi_cof(tag):
     return "A_COF" in attribdict[tag]
 
 
+def need_pkt_need_commit(tag):
+    return 'A_IMPLICIT_WRITES_USR' in attribdict[tag]
+
 def need_condexec_reg(tag, regs):
     if "A_CONDEXEC" in attribdict[tag]:
         for regtype, regid, toss, numregs in regs: