diff mbox series

Hexagon (translate.c): avoid redundant PC updates on COF

Message ID fc059153c3f0526d97b7f13450c02b276b0908e1.1679519341.git.quic_mathbern@quicinc.com (mailing list archive)
State New, archived
Headers show
Series Hexagon (translate.c): avoid redundant PC updates on COF | expand

Commit Message

Matheus Tavares Bernardino March 22, 2023, 9:17 p.m. UTC
When there is a conditional change of flow or an endloop instruction, we
preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
we still generate TCG code to do this update again at gen_goto_tb() when
the condition for the COF is not met, thus producing redundant
instructions. This can be seen with the following packet:

 0x004002e4:  0x5c20d000 {       if (!P0) jump:t PC+0 }

Which generates this TCG code:

   ---- 004002e4
-> mov_i32 pc,$0x4002e8
   and_i32 loc9,p0,$0x1
   mov_i32 branch_taken,loc9
   add_i32 pkt_cnt,pkt_cnt,$0x2
   add_i32 insn_cnt,insn_cnt,$0x2
   brcond_i32 branch_taken,$0x0,ne,$L1
   goto_tb $0x0
   mov_i32 pc,$0x4002e4
   exit_tb $0x7fb0c36e5200
   set_label $L1
   goto_tb $0x1
-> mov_i32 pc,$0x4002e8
   exit_tb $0x7fb0c36e5201
   set_label $L0
   exit_tb $0x7fb0c36e5203

Note that even after optimizations, the redundant PC update is still
present:

   ---- 004002e4
-> mov_i32 pc,$0x4002e8                     sync: 0  dead: 0 1  pref=0xffff
   mov_i32 branch_taken,$0x1                sync: 0  dead: 0 1  pref=0xffff
   add_i32 pkt_cnt,pkt_cnt,$0x2             sync: 0  dead: 0 1  pref=0xffff
   add_i32 insn_cnt,insn_cnt,$0x2           sync: 0  dead: 0 1 2  pref=0xffff
   goto_tb $0x1
-> mov_i32 pc,$0x4002e8                     sync: 0  dead: 0 1  pref=0xffff
   exit_tb $0x7fb0c36e5201
   set_label $L0
   exit_tb $0x7fb0c36e5203

With this patch, the second redundant update is properly discarded.

Note that we need the additional "move_to_pc" flag instead of just
avoiding the update whenever `dest == ctx->next_PC`, as that could
potentially skip updates from a COF with met condition, whose
ctx->branch_dest just happens to be equal to ctx->next_PC.

Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
---
 target/hexagon/translate.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

Comments

Zhijian Li (Fujitsu)" via March 23, 2023, 2:15 p.m. UTC | #1
On 3/22/23 22:17, Matheus Tavares Bernardino wrote:
> When there is a conditional change of flow or an endloop instruction, we
> preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
> we still generate TCG code to do this update again at gen_goto_tb() when
> the condition for the COF is not met, thus producing redundant
> instructions. This can be seen with the following packet:
>
>   0x004002e4:  0x5c20d000 {       if (!P0) jump:t PC+0 }
>
> Which generates this TCG code:
>
>     ---- 004002e4
> -> mov_i32 pc,$0x4002e8
>     and_i32 loc9,p0,$0x1
>     mov_i32 branch_taken,loc9
>     add_i32 pkt_cnt,pkt_cnt,$0x2
>     add_i32 insn_cnt,insn_cnt,$0x2
>     brcond_i32 branch_taken,$0x0,ne,$L1
>     goto_tb $0x0
>     mov_i32 pc,$0x4002e4
>     exit_tb $0x7fb0c36e5200
>     set_label $L1
>     goto_tb $0x1
> -> mov_i32 pc,$0x4002e8
>     exit_tb $0x7fb0c36e5201
>     set_label $L0
>     exit_tb $0x7fb0c36e5203
>
> Note that even after optimizations, the redundant PC update is still
> present:
>
>     ---- 004002e4
> -> mov_i32 pc,$0x4002e8                     sync: 0  dead: 0 1  pref=0xffff
>     mov_i32 branch_taken,$0x1                sync: 0  dead: 0 1  pref=0xffff
>     add_i32 pkt_cnt,pkt_cnt,$0x2             sync: 0  dead: 0 1  pref=0xffff
>     add_i32 insn_cnt,insn_cnt,$0x2           sync: 0  dead: 0 1 2  pref=0xffff
>     goto_tb $0x1
> -> mov_i32 pc,$0x4002e8                     sync: 0  dead: 0 1  pref=0xffff
>     exit_tb $0x7fb0c36e5201
>     set_label $L0
>     exit_tb $0x7fb0c36e5203
>
> With this patch, the second redundant update is properly discarded.
>
> Note that we need the additional "move_to_pc" flag instead of just
> avoiding the update whenever `dest == ctx->next_PC`, as that could
> potentially skip updates from a COF with met condition, whose
> ctx->branch_dest just happens to be equal to ctx->next_PC.
>
> Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
> ---
>   target/hexagon/translate.c | 21 +++++++++++++--------
>   1 file changed, 13 insertions(+), 8 deletions(-)
>
> diff --git target/hexagon/translate.c target/hexagon/translate.c
> index 665476ab48..58d638f734 100644
> --- target/hexagon/translate.c
> +++ target/hexagon/translate.c

Looks good, I appreciate the thorough motivation for this patch!

Reviewed-by: Anton Johansson <anjo@rev.ng>
Taylor Simpson March 23, 2023, 6:52 p.m. UTC | #2
> -----Original Message-----
> From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
> Sent: Wednesday, March 22, 2023 3:17 PM
> To: qemu-devel@nongnu.org
> Cc: Taylor Simpson <tsimpson@quicinc.com>; richard.henderson@linaro.org;
> anjo@rev.ng
> Subject: [PATCH] Hexagon (translate.c): avoid redundant PC updates on COF
> 
> When there is a conditional change of flow or an endloop instruction, we
> preload HEX_REG_PC with ctx->next_PC at gen_start_packet().
> Nonetheless, we still generate TCG code to do this update again at
> gen_goto_tb() when the condition for the COF is not met, thus producing
> redundant instructions. This can be seen with the following packet:
> 
>  0x004002e4:  0x5c20d000 {       if (!P0) jump:t PC+0 }
> 
> Which generates this TCG code:
> 
>    ---- 004002e4
> -> mov_i32 pc,$0x4002e8
>    and_i32 loc9,p0,$0x1
>    mov_i32 branch_taken,loc9
>    add_i32 pkt_cnt,pkt_cnt,$0x2
>    add_i32 insn_cnt,insn_cnt,$0x2
>    brcond_i32 branch_taken,$0x0,ne,$L1
>    goto_tb $0x0
>    mov_i32 pc,$0x4002e4
>    exit_tb $0x7fb0c36e5200
>    set_label $L1
>    goto_tb $0x1
> -> mov_i32 pc,$0x4002e8
>    exit_tb $0x7fb0c36e5201
>    set_label $L0
>    exit_tb $0x7fb0c36e5203
> 
> Note that even after optimizations, the redundant PC update is still
> present:
> 
>    ---- 004002e4
> -> mov_i32 pc,$0x4002e8                     sync: 0  dead: 0 1  pref=0xffff
>    mov_i32 branch_taken,$0x1                sync: 0  dead: 0 1  pref=0xffff
>    add_i32 pkt_cnt,pkt_cnt,$0x2             sync: 0  dead: 0 1  pref=0xffff
>    add_i32 insn_cnt,insn_cnt,$0x2           sync: 0  dead: 0 1 2  pref=0xffff
>    goto_tb $0x1
> -> mov_i32 pc,$0x4002e8                     sync: 0  dead: 0 1  pref=0xffff
>    exit_tb $0x7fb0c36e5201
>    set_label $L0
>    exit_tb $0x7fb0c36e5203
> 
> With this patch, the second redundant update is properly discarded.
> 
> Note that we need the additional "move_to_pc" flag instead of just avoiding
> the update whenever `dest == ctx->next_PC`, as that could potentially skip
> updates from a COF with met condition, whose
> ctx->branch_dest just happens to be equal to ctx->next_PC.
> 
> Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
> ---
>  target/hexagon/translate.c | 21 +++++++++++++--------
>  1 file changed, 13 insertions(+), 8 deletions(-)

Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
diff mbox series

Patch

diff --git target/hexagon/translate.c target/hexagon/translate.c
index 665476ab48..58d638f734 100644
--- target/hexagon/translate.c
+++ target/hexagon/translate.c
@@ -128,14 +128,19 @@  static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
     return translator_use_goto_tb(&ctx->base, dest);
 }
 
-static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest)
+static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool
+                        move_to_pc)
 {
     if (use_goto_tb(ctx, dest)) {
         tcg_gen_goto_tb(idx);
-        tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+        if (move_to_pc) {
+            tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+        }
         tcg_gen_exit_tb(ctx->base.tb, idx);
     } else {
-        tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+        if (move_to_pc) {
+            tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+        }
         tcg_gen_lookup_and_goto_ptr();
     }
 }
@@ -150,11 +155,11 @@  static void gen_end_tb(DisasContext *ctx)
         if (ctx->branch_cond != TCG_COND_ALWAYS) {
             TCGLabel *skip = gen_new_label();
             tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip);
-            gen_goto_tb(ctx, 0, ctx->branch_dest);
+            gen_goto_tb(ctx, 0, ctx->branch_dest, true);
             gen_set_label(skip);
-            gen_goto_tb(ctx, 1, ctx->next_PC);
+            gen_goto_tb(ctx, 1, ctx->next_PC, false);
         } else {
-            gen_goto_tb(ctx, 0, ctx->branch_dest);
+            gen_goto_tb(ctx, 0, ctx->branch_dest, true);
         }
     } else if (ctx->is_tight_loop &&
                pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
@@ -165,9 +170,9 @@  static void gen_end_tb(DisasContext *ctx)
         TCGLabel *skip = gen_new_label();
         tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
         tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
-        gen_goto_tb(ctx, 0, ctx->base.tb->pc);
+        gen_goto_tb(ctx, 0, ctx->base.tb->pc, true);
         gen_set_label(skip);
-        gen_goto_tb(ctx, 1, ctx->next_PC);
+        gen_goto_tb(ctx, 1, ctx->next_PC, false);
     } else {
         tcg_gen_lookup_and_goto_ptr();
     }