diff mbox series

[7/8] Hexagon (target/hexagon) Use direct block chaining for direct jump/branch

Message ID 20221019223739.3868-8-tsimpson@quicinc.com (mailing list archive)
State New, archived
Headers show
Series Hexagon (target/hexagon) Improve change-of-flow | expand

Commit Message

Taylor Simpson Oct. 19, 2022, 10:37 p.m. UTC
Direct block chaining is documented here
https://qemu.readthedocs.io/en/latest/devel/tcg.html#direct-block-chaining

Recall that Hexagon allows packets with multiple jumps where only the first
one with a true predicate will actually jump.  So, we can only use direct
block chaining when the packet contains a single PC-relative jump.  We add
the following to DisasContext in order to perform direct block chaining at
the end of packet commit (in gen_end_tb)
    has_single_direct_branch
        Indicates that we can use direct block chaining
    branch_cond
        The condition under which the branch is taken
    branch_dest
        The destination of the branch

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/translate.h |  3 +++
 target/hexagon/genptr.c    | 13 ++++++-------
 target/hexagon/translate.c | 39 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 47 insertions(+), 8 deletions(-)

Comments

Matheus Tavares Bernardino Oct. 20, 2022, 3:24 p.m. UTC | #1
Taylor Simpson <tsimpson@quicinc.com> wrote:
>
> diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
> index eae358cf33..e60dbf0e7a 100644
> --- a/target/hexagon/translate.h
> +++ b/target/hexagon/translate.h
> @@ -54,6 +54,9 @@ typedef struct DisasContext {
>      bool qreg_is_predicated[NUM_QREGS];
>      int qreg_log_idx;
>      bool pre_commit;
> +    bool has_single_direct_branch;
> +    TCGv branch_cond;
> +    target_ulong branch_dest;
>  } DisasContext;
>  
>  static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
> diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
> index fba76d3b38..07b4326e56 100644
> --- a/target/hexagon/genptr.c
> +++ b/target/hexagon/genptr.c
> @@ -505,15 +505,14 @@ static void gen_write_new_pc_pcrel(DisasContext *ctx, Packet *pkt,
>              gen_set_label(pred_false);
>          }
>      } else {
> -        TCGLabel *pred_false = NULL;
> +        /* Defer this jump to the end of the TB */
> +        g_assert(ctx->branch_cond == NULL);
> +        ctx->has_single_direct_branch = true;
>          if (pred != NULL) {
> -            pred_false = gen_new_label();
> -            tcg_gen_brcondi_tl(TCG_COND_EQ, pred, 0, pred_false);
> -        }
> -        tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
> -        if (pred != NULL) {
> -            gen_set_label(pred_false);
> +            ctx->branch_cond = tcg_temp_local_new();
> +            tcg_gen_mov_tl(ctx->branch_cond, pred);
>          }
> +        ctx->branch_dest = dest;
>      }
>  }

Do we want to perform this logic at gen_write_new_pc_addr() as well?

Although, in that case, we would need a separate ctx->branch_dest to
hold a TCGv instead of target_ulong...

Or have a single variable (TCGv) but add an extra
tcg_gen_addi(ctx->branch_dest, tcg_gen_constant_tl(pkt->pc), pc_off)
call to gen_write_new_pc_pcrel(). (In which case, we could also 
unify the two gen_write_new_pc_* functions and have one as a thin
wrapper around the other.) IDK about the extra overhead from 
tcg_gen_addi(), though.
Taylor Simpson Oct. 20, 2022, 4:56 p.m. UTC | #2
> -----Original Message-----
> From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
> Sent: Thursday, October 20, 2022 10:25 AM
> To: Taylor Simpson <tsimpson@quicinc.com>
> Cc: ale@rev.ng; anjo@rev.ng; Brian Cain <bcain@quicinc.com>;
> philmd@linaro.org; qemu-devel@nongnu.org; Matheus Bernardino (QUIC)
> <quic_mathbern@quicinc.com>; richard.henderson@linaro.org
> Subject: Re: [PATCH 7/8] Hexagon (target/hexagon) Use direct block chaining
> for direct jump/branch
> 
> 
> Do we want to perform this logic at gen_write_new_pc_addr() as well?
> 
> Although, in that case, we would need a separate ctx->branch_dest to hold a
> TCGv instead of target_ulong...
> 
> Or have a single variable (TCGv) but add an extra tcg_gen_addi(ctx-
> >branch_dest, tcg_gen_constant_tl(pkt->pc), pc_off) call to
> gen_write_new_pc_pcrel(). (In which case, we could also unify the two
> gen_write_new_pc_* functions and have one as a thin wrapper around the
> other.) IDK about the extra overhead from tcg_gen_addi(), though.

We get the best performance from direct block chaining when the destination is a constant at translation time (i.e., a direct branch) because we can use goto_tb + exit_tb.

Take a look at gen_end_tb in translate.c to see how this is ultimately done.  For a single direct branch and a tight loop, we get the best performance.  Otherwise, we use tcg_gen_lookup_and_goto_ptr which is still better than what we were doing before.

Thanks,
Taylor
diff mbox series

Patch

diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index eae358cf33..e60dbf0e7a 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -54,6 +54,9 @@  typedef struct DisasContext {
     bool qreg_is_predicated[NUM_QREGS];
     int qreg_log_idx;
     bool pre_commit;
+    bool has_single_direct_branch;
+    TCGv branch_cond;
+    target_ulong branch_dest;
 } DisasContext;
 
 static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index fba76d3b38..07b4326e56 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -505,15 +505,14 @@  static void gen_write_new_pc_pcrel(DisasContext *ctx, Packet *pkt,
             gen_set_label(pred_false);
         }
     } else {
-        TCGLabel *pred_false = NULL;
+        /* Defer this jump to the end of the TB */
+        g_assert(ctx->branch_cond == NULL);
+        ctx->has_single_direct_branch = true;
         if (pred != NULL) {
-            pred_false = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, pred, 0, pred_false);
-        }
-        tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
-        if (pred != NULL) {
-            gen_set_label(pred_false);
+            ctx->branch_cond = tcg_temp_local_new();
+            tcg_gen_mov_tl(ctx->branch_cond, pred);
         }
+        ctx->branch_dest = dest;
     }
 }
 
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 71ad2da682..29e2caaf0f 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -116,10 +116,44 @@  static void gen_exec_counters(DisasContext *ctx)
                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
 }
 
+static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
+{
+    return translator_use_goto_tb(&ctx->base, dest);
+}
+
+static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest)
+{
+    if (use_goto_tb(ctx, dest)) {
+        tcg_gen_goto_tb(idx);
+        tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+        tcg_gen_exit_tb(ctx->base.tb, idx);
+    } else {
+        tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+        tcg_gen_lookup_and_goto_ptr();
+    }
+}
+
 static void gen_end_tb(DisasContext *ctx)
 {
     gen_exec_counters(ctx);
-    tcg_gen_exit_tb(NULL, 0);
+
+    if (ctx->has_single_direct_branch) {
+        if (ctx->branch_cond != NULL) {
+            TCGLabel *skip = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_EQ, ctx->branch_cond, 0, skip);
+            gen_goto_tb(ctx, 0, ctx->branch_dest);
+            gen_set_label(skip);
+            gen_goto_tb(ctx, 1, ctx->next_PC);
+            tcg_temp_free(ctx->branch_cond);
+            ctx->branch_cond = NULL;
+        } else {
+            gen_goto_tb(ctx, 0, ctx->branch_dest);
+        }
+    } else {
+        tcg_gen_lookup_and_goto_ptr();
+    }
+
+    g_assert(ctx->branch_cond == NULL);
     ctx->base.is_jmp = DISAS_NORETURN;
 }
 
@@ -803,6 +837,9 @@  static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
 
 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
 {
+    DisasContext *ctx = container_of(db, DisasContext, base);
+    ctx->has_single_direct_branch = false;
+    ctx->branch_cond = NULL;
 }
 
 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)