diff mbox series

[v3,10/11] Hexagon (target/hexagon) Use direct block chaining for direct jump/branch

Message ID 20221104192631.29434-11-tsimpson@quicinc.com (mailing list archive)
State New, archived
Headers show
Series Hexagon (target/hexagon) performance and bug fixes | expand

Commit Message

Taylor Simpson Nov. 4, 2022, 7:26 p.m. UTC
Direct block chaining is documented here
https://qemu.readthedocs.io/en/latest/devel/tcg.html#direct-block-chaining

Recall that Hexagon allows packets with multiple jumps where only the first
one with a true predicate will actually jump.  So, we can only use direct
block chaining when the packet contains a single PC-relative jump.  We add
the following to DisasContext in order to perform direct block chaining at
the end of packet commit (in gen_end_tb)
    has_single_direct_branch
        Indicates that we can use direct block chaining
    branch_cond
        The condition under which the branch is taken
    branch_dest
        The destination of the branch

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/translate.h |  3 +++
 target/hexagon/genptr.c    | 13 ++++++++++++-
 target/hexagon/translate.c | 39 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 53 insertions(+), 2 deletions(-)

Comments

Richard Henderson Nov. 5, 2022, 1:33 a.m. UTC | #1
On 11/5/22 06:26, Taylor Simpson wrote:
> Direct block chaining is documented here
> https://qemu.readthedocs.io/en/latest/devel/tcg.html#direct-block-chaining
> 
> Recall that Hexagon allows packets with multiple jumps where only the first
> one with a true predicate will actually jump.  So, we can only use direct
> block chaining when the packet contains a single PC-relative jump.

Not quite accurate.

Only the first two direct branches can use direct block chaining.  Other exits from the 
translation block could use indirect block chaining (tcg_gen_lookup_and_goto_ptr).  You 
just have to remember which is taken.

That said, this is certainly an improvement.

> +    if (ctx->pkt->pkt_has_multi_cof) {
> +        gen_write_new_pc_addr(ctx, tcg_constant_tl(dest), pred);
> +    } else {
> +        /* Defer this jump to the end of the TB */
> +        g_assert(ctx->branch_cond == NULL);
> +        ctx->has_single_direct_branch = true;
> +        if (pred != NULL) {
> +            ctx->branch_cond = tcg_temp_local_new();
> +            tcg_gen_mov_tl(ctx->branch_cond, pred);
> +        }
> +        ctx->branch_dest = dest;

Perhaps re-use hex_branch_taken as branch_cond?

Anyway,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
Taylor Simpson Nov. 6, 2022, 9:46 p.m. UTC | #2
> -----Original Message-----
> From: Richard Henderson <richard.henderson@linaro.org>
> Sent: Friday, November 4, 2022 8:33 PM
> To: Taylor Simpson <tsimpson@quicinc.com>; qemu-devel@nongnu.org
> Cc: philmd@linaro.org; ale@rev.ng; anjo@rev.ng; Brian Cain
> <bcain@quicinc.com>; Matheus Bernardino (QUIC)
> <quic_mathbern@quicinc.com>
> Subject: Re: [PATCH v3 10/11] Hexagon (target/hexagon) Use direct block
> chaining for direct jump/branch
> 
> On 11/5/22 06:26, Taylor Simpson wrote:
> > Direct block chaining is documented here
> > https://qemu.readthedocs.io/en/latest/devel/tcg.html#direct-block-chai
> > ning
> >
> > Recall that Hexagon allows packets with multiple jumps where only the
> > first one with a true predicate will actually jump.  So, we can only
> > use direct block chaining when the packet contains a single PC-relative
> jump.
> 
> Not quite accurate.
> 
> Only the first two direct branches can use direct block chaining.  Other exits
> from the translation block could use indirect block chaining
> (tcg_gen_lookup_and_goto_ptr).  You just have to remember which is
> taken.
> 

I'll work on the wording in the commit message.  When there is a single PC-relative branch or jump in the packet, we use tcg_gen_goto_tb/tcg_gen_exit_tb.  Otherwise, we use tcg_gen_lookup_and_goto_ptr.


> That said, this is certainly an improvement.
> 
> > +    if (ctx->pkt->pkt_has_multi_cof) {
> > +        gen_write_new_pc_addr(ctx, tcg_constant_tl(dest), pred);
> > +    } else {
> > +        /* Defer this jump to the end of the TB */
> > +        g_assert(ctx->branch_cond == NULL);
> > +        ctx->has_single_direct_branch = true;
> > +        if (pred != NULL) {
> > +            ctx->branch_cond = tcg_temp_local_new();
> > +            tcg_gen_mov_tl(ctx->branch_cond, pred);
> > +        }
> > +        ctx->branch_dest = dest;
> 
> Perhaps re-use hex_branch_taken as branch_cond?

Good idea.  That will save the allocation/deallocation of the TCGv.  I'll change it to a TCGCond to indicate the comparison to be done (if any).  It will work nicely with your other suggestion to pass the branch condition along.


> 
> Anyway,
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 
> 
> r~
diff mbox series

Patch

diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 96509a4da7..0841e8418e 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -57,6 +57,9 @@  typedef struct DisasContext {
     bool qreg_is_predicated[NUM_QREGS];
     int qreg_log_idx;
     bool pre_commit;
+    bool has_single_direct_branch;
+    TCGv branch_cond;
+    target_ulong branch_dest;
 } DisasContext;
 
 static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 81c2ae464d..235ea9d210 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -482,7 +482,18 @@  static void gen_write_new_pc_addr(DisasContext *ctx, TCGv addr, TCGv pred)
 static void gen_write_new_pc_pcrel(DisasContext *ctx, int pc_off, TCGv pred)
 {
     target_ulong dest = ctx->pkt->pc + pc_off;
-    gen_write_new_pc_addr(ctx, tcg_constant_tl(dest), pred);
+    if (ctx->pkt->pkt_has_multi_cof) {
+        gen_write_new_pc_addr(ctx, tcg_constant_tl(dest), pred);
+    } else {
+        /* Defer this jump to the end of the TB */
+        g_assert(ctx->branch_cond == NULL);
+        ctx->has_single_direct_branch = true;
+        if (pred != NULL) {
+            ctx->branch_cond = tcg_temp_local_new();
+            tcg_gen_mov_tl(ctx->branch_cond, pred);
+        }
+        ctx->branch_dest = dest;
+    }
 }
 
 static void gen_set_usr_field(int field, TCGv val)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index fa6415936c..8c007c6f07 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -116,10 +116,44 @@  static void gen_exec_counters(DisasContext *ctx)
                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
 }
 
+static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
+{
+    return translator_use_goto_tb(&ctx->base, dest);
+}
+
+static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest)
+{
+    if (use_goto_tb(ctx, dest)) {
+        tcg_gen_goto_tb(idx);
+        tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+        tcg_gen_exit_tb(ctx->base.tb, idx);
+    } else {
+        tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+        tcg_gen_lookup_and_goto_ptr();
+    }
+}
+
 static void gen_end_tb(DisasContext *ctx)
 {
     gen_exec_counters(ctx);
-    tcg_gen_exit_tb(NULL, 0);
+
+    if (ctx->has_single_direct_branch) {
+        if (ctx->branch_cond != NULL) {
+            TCGLabel *skip = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_EQ, ctx->branch_cond, 0, skip);
+            gen_goto_tb(ctx, 0, ctx->branch_dest);
+            gen_set_label(skip);
+            gen_goto_tb(ctx, 1, ctx->next_PC);
+            tcg_temp_free(ctx->branch_cond);
+            ctx->branch_cond = NULL;
+        } else {
+            gen_goto_tb(ctx, 0, ctx->branch_dest);
+        }
+    } else {
+        tcg_gen_lookup_and_goto_ptr();
+    }
+
+    g_assert(ctx->branch_cond == NULL);
     ctx->base.is_jmp = DISAS_NORETURN;
 }
 
@@ -811,6 +845,9 @@  static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
 
 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
 {
+    DisasContext *ctx = container_of(db, DisasContext, base);
+    ctx->has_single_direct_branch = false;
+    ctx->branch_cond = NULL;
 }
 
 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)